2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 1998-2005 Ivo Timmermans,
4 2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5 2010 Timothy Redaelli <timothy@redaelli.eu>
6 2010 Brandon Black <blblack@gmail.com>
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include "connection.h"
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
57 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
60 static void send_udppacket(node_t *, vpn_packet_t *);
62 unsigned replaywin = 16;
63 bool localdiscovery = true;
64 bool udp_discovery = true;
65 int udp_discovery_interval = 9;
66 int udp_discovery_timeout = 30;
68 #define MAX_SEQNO 1073741824
70 static void try_fix_mtu(node_t *n) {
74 if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
75 if(n->minmtu > n->maxmtu)
76 n->minmtu = n->maxmtu;
78 n->maxmtu = n->minmtu;
80 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
85 static void udp_probe_timeout_handler(void *data) {
87 if(!n->status.udp_confirmed)
90 logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
91 n->status.udp_confirmed = false;
97 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
98 if(!DATA(packet)[0]) {
99 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
101 /* It's a probe request, send back a reply */
103 /* Type 2 probe replies were introduced in protocol 17.3 */
104 if ((n->options >> 24) >= 3) {
105 uint8_t *data = DATA(packet);
107 uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
109 gettimeofday(&now, NULL);
110 uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
111 uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
114 /* Legacy protocol: n won't understand type 2 probe replies. */
118 /* Temporarily set udp_confirmed, so that the reply is sent
119 back exactly the way it came in. */
121 bool udp_confirmed = n->status.udp_confirmed;
122 n->status.udp_confirmed = true;
123 send_udppacket(n, packet);
124 n->status.udp_confirmed = udp_confirmed;
126 length_t probelen = len;
127 if (DATA(packet)[0] == 2) {
129 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
131 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
134 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
136 /* It's a valid reply: now we know bidirectional communication
137 is possible using the address and socket that the reply
139 n->status.udp_confirmed = true;
142 timeout_del(&n->udp_ping_timeout);
143 timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
146 if(probelen >= n->maxmtu + 1) {
147 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
149 /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
154 /* If applicable, raise the minimum supported MTU */
156 if(probelen > n->maxmtu)
157 probelen = n->maxmtu;
158 if(n->minmtu < probelen) {
159 n->minmtu = probelen;
164 The RTT is the time between the MTU probe burst was sent and the first
168 struct timeval now, diff;
169 gettimeofday(&now, NULL);
170 timersub(&now, &n->probe_time, &diff);
172 struct timeval probe_timestamp = now;
173 if (DATA(packet)[0] == 2 && packet->len >= 11) {
174 uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
175 uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
176 probe_timestamp.tv_sec = ntohl(sec);
177 probe_timestamp.tv_usec = ntohl(usec);
182 if(n->probe_counter == 1) {
183 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
184 n->probe_time = probe_timestamp;
185 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
190 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
192 memcpy(dest, source, len);
194 } else if(level == 10) {
196 lzo_uint lzolen = MAXSIZE;
197 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
202 } else if(level < 10) {
204 unsigned long destlen = MAXSIZE;
205 if(compress2(dest, &destlen, source, len, level) == Z_OK)
212 lzo_uint lzolen = MAXSIZE;
213 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
223 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
225 memcpy(dest, source, len);
227 } else if(level > 9) {
229 lzo_uint lzolen = MAXSIZE;
230 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
238 unsigned long destlen = MAXSIZE;
239 if(uncompress(dest, &destlen, source, len) == Z_OK)
251 static void receive_packet(node_t *n, vpn_packet_t *packet) {
252 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
253 packet->len, n->name, n->hostname);
256 n->in_bytes += packet->len;
261 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
263 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
265 #ifdef DISABLE_LEGACY
268 if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
271 return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
275 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
276 vpn_packet_t pkt1, pkt2;
277 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
280 pkt1.offset = DEFAULT_PACKET_OFFSET;
281 pkt2.offset = DEFAULT_PACKET_OFFSET;
283 if(n->status.sptps) {
284 if(!n->sptps.state) {
285 if(!n->status.waitingforkey) {
286 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
289 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
293 inpkt->offset += 2 * sizeof(node_id_t);
294 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
295 logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
301 #ifdef DISABLE_LEGACY
304 if(!n->status.validkey) {
305 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
309 /* Check packet length */
311 if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
312 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
313 n->name, n->hostname);
317 /* It's a legacy UDP packet, the data starts after the seqno */
319 inpkt->offset += sizeof(seqno_t);
321 /* Check the message authentication code */
323 if(digest_active(n->indigest)) {
324 inpkt->len -= digest_length(n->indigest);
325 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
326 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
330 /* Decrypt the packet */
332 if(cipher_active(n->incipher)) {
333 vpn_packet_t *outpkt = pkt[nextpkt++];
336 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
337 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
341 outpkt->len = outlen;
345 /* Check the sequence number */
348 memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
349 seqno = ntohl(seqno);
350 inpkt->len -= sizeof seqno;
353 if(seqno != n->received_seqno + 1) {
354 if(seqno >= n->received_seqno + replaywin * 8) {
355 if(n->farfuture++ < replaywin >> 2) {
356 logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
357 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
360 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
361 seqno - n->received_seqno - 1, n->name, n->hostname);
362 memset(n->late, 0, replaywin);
363 } else if (seqno <= n->received_seqno) {
364 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
365 logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
366 n->name, n->hostname, seqno, n->received_seqno);
370 for(int i = n->received_seqno + 1; i < seqno; i++)
371 n->late[(i / 8) % replaywin] |= 1 << i % 8;
376 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
379 if(seqno > n->received_seqno)
380 n->received_seqno = seqno;
384 if(n->received_seqno > MAX_SEQNO)
387 /* Decompress the packet */
389 length_t origlen = inpkt->len;
391 if(n->incompression) {
392 vpn_packet_t *outpkt = pkt[nextpkt++];
394 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
395 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
396 n->name, n->hostname);
402 origlen -= MTU/64 + 20;
407 if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
408 udp_probe_h(n, inpkt, origlen);
410 receive_packet(n, inpkt);
415 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
417 outpkt.offset = DEFAULT_PACKET_OFFSET;
419 if(len > sizeof outpkt.data - outpkt.offset)
423 if(c->options & OPTION_TCPONLY)
426 outpkt.priority = -1;
427 memcpy(DATA(&outpkt), buffer, len);
429 receive_packet(c->node, &outpkt);
432 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
433 if(!n->status.validkey && !n->connection)
439 if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
440 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
444 if(routing_mode == RMODE_ROUTER)
449 if(origpkt->len < offset)
454 if(n->outcompression) {
456 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
458 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
459 } else if(len < origpkt->len - offset) {
460 outpkt.len = len + offset;
462 type |= PKT_COMPRESSED;
466 /* If we have a direct metaconnection to n, and we can't use UDP, then
467 don't bother with SPTPS and just use a "plaintext" PACKET message.
468 We don't really care about end-to-end security since we're not
469 sending the message through any intermediate nodes. */
470 if(n->connection && origpkt->len > n->minmtu)
471 send_tcppacket(n->connection, origpkt);
473 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
477 static void adapt_socket(const sockaddr_t *sa, int *sock) {
478 /* Make sure we have a suitable socket for the chosen address */
479 if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
480 for(int i = 0; i < listen_sockets; i++) {
481 if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
489 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
494 /* If the UDP address is confirmed, use it. */
495 if(n->status.udp_confirmed)
498 /* Send every third packet to n->address; that could be set
499 to the node's reflexive UDP address discovered during key
508 /* Otherwise, address are found in edges to this node.
509 So we pick a random edge and a random socket. */
512 int j = rand() % n->edge_tree->count;
513 edge_t *candidate = NULL;
515 for splay_each(edge_t, e, n->edge_tree) {
517 candidate = e->reverse;
523 *sa = &candidate->address;
524 *sock = rand() % listen_sockets;
527 adapt_socket(*sa, sock);
530 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
533 /* Pick one of the edges from this node at random, then use its local address. */
536 int j = rand() % n->edge_tree->count;
537 edge_t *candidate = NULL;
539 for splay_each(edge_t, e, n->edge_tree) {
546 if (candidate && candidate->local_address.sa.sa_family) {
547 *sa = &candidate->local_address;
548 *sock = rand() % listen_sockets;
549 adapt_socket(*sa, sock);
553 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
554 vpn_packet_t pkt1, pkt2;
555 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
556 vpn_packet_t *inpkt = origpkt;
558 vpn_packet_t *outpkt;
559 int origlen = origpkt->len;
561 #if defined(SOL_IP) && defined(IP_TOS)
562 static int priority = 0;
563 int origpriority = origpkt->priority;
566 pkt1.offset = DEFAULT_PACKET_OFFSET;
567 pkt2.offset = DEFAULT_PACKET_OFFSET;
569 if(!n->status.reachable) {
570 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
575 return send_sptps_packet(n, origpkt);
577 #ifdef DISABLE_LEGACY
580 /* Make sure we have a valid key */
582 if(!n->status.validkey) {
583 logger(DEBUG_TRAFFIC, LOG_INFO,
584 "No valid key known yet for %s (%s), forwarding via TCP",
585 n->name, n->hostname);
586 send_tcppacket(n->nexthop->connection, origpkt);
590 if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
591 logger(DEBUG_TRAFFIC, LOG_INFO,
592 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
593 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
596 send_packet(n->nexthop, origpkt);
598 send_tcppacket(n->nexthop->connection, origpkt);
603 /* Compress the packet */
605 if(n->outcompression) {
606 outpkt = pkt[nextpkt++];
608 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
609 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
610 n->name, n->hostname);
617 /* Add sequence number */
619 seqno_t seqno = htonl(++(n->sent_seqno));
620 memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
621 inpkt->len += sizeof seqno;
623 /* Encrypt the packet */
625 if(cipher_active(n->outcipher)) {
626 outpkt = pkt[nextpkt++];
629 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
630 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
634 outpkt->len = outlen;
638 /* Add the message authentication code */
640 if(digest_active(n->outdigest)) {
641 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
642 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
646 inpkt->len += digest_length(n->outdigest);
649 /* Send the packet */
651 const sockaddr_t *sa = NULL;
654 if(n->status.send_locally)
655 choose_local_address(n, &sa, &sock);
657 choose_udp_address(n, &sa, &sock);
659 #if defined(SOL_IP) && defined(IP_TOS)
660 if(priorityinheritance && origpriority != priority
661 && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
662 priority = origpriority;
663 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
664 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
665 logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
669 if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
670 if(sockmsgsize(sockerrno)) {
671 if(n->maxmtu >= origlen)
672 n->maxmtu = origlen - 1;
673 if(n->mtu >= origlen)
674 n->mtu = origlen - 1;
677 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
681 origpkt->len = origlen;
685 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
686 node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
687 bool direct = from == myself && to == relay;
688 bool relay_supported = (relay->options >> 24) >= 4;
689 bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
691 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
692 TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
693 This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
695 if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
696 char buf[len * 4 / 3 + 5];
697 b64encode(data, buf, len);
698 /* If no valid key is known yet, send the packets using ANS_KEY requests,
699 to ensure we get to learn the reflexive UDP address. */
700 if(from == myself && !to->status.validkey) {
701 to->incompression = myself->incompression;
702 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
704 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
709 if(relay_supported) overhead += sizeof to->id + sizeof from->id;
710 char buf[len + overhead]; char* buf_ptr = buf;
711 if(relay_supported) {
713 /* Inform the recipient that this packet was sent directly. */
714 node_id_t nullid = {};
715 memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
717 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
719 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
722 /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
723 memcpy(buf_ptr, data, len); buf_ptr += len;
725 const sockaddr_t *sa = NULL;
727 if(relay->status.send_locally)
728 choose_local_address(relay, &sa, &sock);
730 choose_udp_address(relay, &sa, &sock);
731 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
732 if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
733 if(sockmsgsize(sockerrno)) {
734 // Compensate for SPTPS overhead
735 len -= SPTPS_DATAGRAM_OVERHEAD;
736 if(relay->maxmtu >= len)
737 relay->maxmtu = len - 1;
738 if(relay->mtu >= len)
739 relay->mtu = len - 1;
742 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
750 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
751 return send_sptps_data_priv(handle, myself, type, data, len);
754 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
755 node_t *from = handle;
757 if(type == SPTPS_HANDSHAKE) {
758 if(!from->status.validkey) {
759 from->status.validkey = true;
760 from->status.waitingforkey = false;
761 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
767 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
772 inpkt.offset = DEFAULT_PACKET_OFFSET;
774 if(type == PKT_PROBE) {
776 memcpy(DATA(&inpkt), data, len);
777 udp_probe_h(from, &inpkt, len);
781 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
782 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
786 /* Check if we have the headers we need */
787 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
788 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
790 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
791 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
794 int offset = (type & PKT_MAC) ? 0 : 14;
795 if(type & PKT_COMPRESSED) {
796 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
800 inpkt.len = ulen + offset;
802 if(inpkt.len > MAXSIZE)
805 memcpy(DATA(&inpkt) + offset, data, len);
806 inpkt.len = len + offset;
809 /* Generate the Ethernet packet type if necessary */
811 switch(DATA(&inpkt)[14] >> 4) {
813 DATA(&inpkt)[12] = 0x08;
814 DATA(&inpkt)[13] = 0x00;
817 DATA(&inpkt)[12] = 0x86;
818 DATA(&inpkt)[13] = 0xDD;
821 logger(DEBUG_TRAFFIC, LOG_ERR,
822 "Unknown IP version %d while reading packet from %s (%s)",
823 DATA(&inpkt)[14] >> 4, from->name, from->hostname);
828 receive_packet(from, &inpkt);
832 // This function tries to get SPTPS keys, if they aren't already known.
833 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
834 static void try_sptps(node_t *n) {
835 if(n->status.validkey)
838 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
840 if(!n->status.waitingforkey)
842 else if(n->last_req_key + 10 < now.tv_sec) {
843 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
844 sptps_stop(&n->sptps);
845 n->status.waitingforkey = false;
852 static void send_udp_probe_packet(node_t *n, int len) {
854 packet.offset = DEFAULT_PACKET_OFFSET;
855 memset(DATA(&packet), 0, 14);
856 randomize(DATA(&packet) + 14, len - 14);
860 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
862 send_udppacket(n, &packet);
865 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
866 // If a tunnel is already established, it makes sure it stays up.
867 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
868 static void try_udp(node_t* n) {
873 gettimeofday(&now, NULL);
874 struct timeval ping_tx_elapsed;
875 timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
877 if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) {
878 send_udp_probe_packet(n, MAX(n->minmtu, 16));
879 n->udp_ping_sent = now;
881 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
882 n->status.send_locally = true;
883 send_udp_probe_packet(n, 16);
884 n->status.send_locally = false;
889 static length_t choose_initial_maxmtu(node_t *n) {
894 const sockaddr_t *sa = NULL;
896 choose_udp_address(n, &sa, &sockindex);
900 sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
902 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
906 if(connect(sock, &sa->sa, SALEN(sa->sa))) {
907 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
913 socklen_t ip_mtu_len = sizeof ip_mtu;
914 if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
915 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
922 /* getsockopt(IP_MTU) returns the MTU of the physical interface.
923 We need to remove various overheads to get to the tinc MTU. */
924 length_t mtu = ip_mtu;
925 mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
927 if(n->status.sptps) {
928 mtu -= SPTPS_DATAGRAM_OVERHEAD;
929 if((n->options >> 24) >= 4)
930 mtu -= sizeof(node_id_t) + sizeof(node_id_t);
934 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
940 logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
950 // This function tries to determines the MTU of a node.
951 // By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
952 // If the MTU is already fixed, this function checks if it can be increased.
953 static void try_mtu(node_t *n) {
954 if(!(n->options & OPTION_PMTU_DISCOVERY))
957 if(udp_discovery && !n->status.udp_confirmed) {
964 /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
965 mtuprobes == 20: fix MTU, and go to -1
966 mtuprobes == -1: send one >maxmtu probe every pingtimeout */
969 gettimeofday(&now, NULL);
970 struct timeval elapsed;
971 timersub(&now, &n->probe_sent_time, &elapsed);
972 if(n->mtuprobes >= 0) {
973 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
976 if(elapsed.tv_sec < pingtimeout)
983 if(n->mtuprobes < 0) {
984 /* After the initial discovery, we only send one >maxmtu probe
985 to detect PMTU increases. */
986 if(n->maxmtu + 1 < MTU)
987 send_udp_probe_packet(n, n->maxmtu + 1);
989 /* Before initial discovery begins, set maxmtu to the most likely value.
990 If it's underestimated, we will correct it after initial discovery. */
991 if(n->mtuprobes == 0)
992 n->maxmtu = choose_initial_maxmtu(n);
994 /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
995 but it will typically increase convergence time in the no-loss case. */
996 const length_t probes_per_cycle = 8;
998 /* This magic value was determined using math simulations.
999 It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
1000 Since 1407 is just below the range of tinc MTUs over typical networks,
1001 this fine-tuning allows tinc to cover a lot of ground very quickly.
1002 This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1003 then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1004 if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1005 const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1007 const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1008 const length_t minmtu = MAX(n->minmtu, 512);
1009 const float interval = n->maxmtu - minmtu;
1011 /* The core of the discovery algorithm is this exponential.
1012 It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1013 This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1014 are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1015 on the precise MTU as we are approaching it.
1016 The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1017 reply per cycle so that we can make progress. */
1018 const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1020 send_udp_probe_packet(n, minmtu + offset);
1021 if(n->mtuprobes >= 0)
1025 n->probe_counter = 0;
1026 n->probe_sent_time = now;
1027 n->probe_time = now;
1029 /* Calculate the packet loss of incoming traffic by comparing the rate of
1030 packets received to the rate with which the sequence number has increased.
1031 TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
1034 if(n->received > n->prev_received)
1035 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
1037 n->packetloss = n->received_seqno <= n->prev_received_seqno;
1039 n->prev_received_seqno = n->received_seqno;
1040 n->prev_received = n->received;
1043 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
1044 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
1045 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
1046 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
1047 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
1048 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
1049 static void try_tx(node_t *n) {
1050 /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1051 messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
1052 if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
1054 if (!n->status.validkey)
1058 node_t *via = (n->via == myself) ? n->nexthop : n->via;
1060 if((myself->options | via->options) & OPTION_TCPONLY)
1063 if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
1065 via->last_req_key = now.tv_sec;
1066 } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
1071 /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
1072 if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
1073 try_tx(via->nexthop);
1077 send a packet to the given vpn ip.
1079 void send_packet(node_t *n, vpn_packet_t *packet) {
1084 memcpy(DATA(packet), mymac.x, ETH_ALEN);
1086 n->out_bytes += packet->len;
1087 devops.write(packet);
1091 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
1092 packet->len, n->name, n->hostname);
1094 if(!n->status.reachable) {
1095 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
1096 n->name, n->hostname);
1101 n->out_bytes += packet->len;
1103 if(n->status.sptps) {
1104 send_sptps_packet(n, packet);
1108 via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1111 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1112 n->name, via->name, n->via->hostname);
1114 if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1115 if(!send_tcppacket(via->connection, packet))
1116 terminate_connection(via->connection, true);
1118 send_udppacket(via, packet);
1121 /* Try to improve the tunnel.
1122 Note that we do this *after* we send the packet because sending actual packets take priority
1123 with regard to the send buffer space and latency. */
1127 /* Broadcast a packet using the minimum spanning tree */
1129 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1130 // Always give ourself a copy of the packet.
1132 send_packet(myself, packet);
1134 // In TunnelServer mode, do not forward broadcast packets.
1135 // The MST might not be valid and create loops.
1136 if(tunnelserver || broadcast_mode == BMODE_NONE)
1139 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1140 packet->len, from->name, from->hostname);
1142 switch(broadcast_mode) {
1143 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1144 // This guarantees all nodes receive the broadcast packet, and
1145 // usually distributes the sending of broadcast packets over all nodes.
1147 for list_each(connection_t, c, connection_list)
1148 if(c->edge && c->status.mst && c != from->nexthop->connection)
1149 send_packet(c->node, packet);
1152 // In direct mode, we send copies to each node we know of.
1153 // However, this only reaches nodes that can be reached in a single hop.
1154 // We don't have enough information to forward broadcast packets in this case.
1159 for splay_each(node_t, n, node_tree)
1160 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1161 send_packet(n, packet);
1169 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1172 static time_t last_hard_try = 0;
1174 for splay_each(edge_t, e, edge_weight_tree) {
1175 if(!e->to->status.reachable || e->to == myself)
1178 if(sockaddrcmp_noport(from, &e->address)) {
1179 if(last_hard_try == now.tv_sec)
1184 if(!try_mac(e->to, pkt))
1192 last_hard_try = now.tv_sec;
1194 last_hard_try = now.tv_sec;
1198 void handle_incoming_vpn_data(void *data, int flags) {
1199 listen_socket_t *ls = data;
1202 node_id_t nullid = {};
1203 sockaddr_t addr = {};
1204 socklen_t addrlen = sizeof addr;
1206 bool direct = false;
1209 int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1211 if(len <= 0 || len > MAXSIZE) {
1212 if(!sockwouldblock(sockerrno))
1213 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1219 sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1221 // Try to figure out who sent this packet.
1223 node_t *n = lookup_node_udp(&addr);
1226 // It might be from a 1.1 node, which might have a source ID in the packet.
1227 pkt.offset = 2 * sizeof(node_id_t);
1228 from = lookup_node_id(SRCID(&pkt));
1229 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1230 if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1239 n = try_harder(&addr, &pkt);
1244 if(debug_level >= DEBUG_PROTOCOL) {
1245 hostname = sockaddr2hostname(&addr);
1246 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1252 if(n->status.sptps) {
1253 pkt.offset = 2 * sizeof(node_id_t);
1255 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1260 from = lookup_node_id(SRCID(&pkt));
1261 to = lookup_node_id(DSTID(&pkt));
1264 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1269 send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1278 if(!receive_udppacket(from, &pkt))
1281 n->sock = ls - listen_socket;
1282 if(direct && sockaddrcmp(&addr, &n->address))
1283 update_node_udp(n, &addr);
1286 void handle_device_data(void *data, int flags) {
1287 vpn_packet_t packet;
1288 packet.offset = DEFAULT_PACKET_OFFSET;
1289 packet.priority = 0;
1291 if(devops.read(&packet)) {
1292 myself->in_packets++;
1293 myself->in_bytes += packet.len;
1294 route(myself, &packet);