2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 1998-2005 Ivo Timmermans,
4 2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5 2010 Timothy Redaelli <timothy@redaelli.eu>
6 2010 Brandon Black <blblack@gmail.com>
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include "connection.h"
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
55 /* The minimum size of a probe is 14 bytes, but since we normally use CBC mode
56 encryption, we can add a few extra random bytes without increasing the
57 resulting packet size. */
58 #define MIN_PROBE_SIZE 18
62 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
65 static void send_udppacket(node_t *, vpn_packet_t *);
67 unsigned replaywin = 16;
68 bool localdiscovery = true;
69 bool udp_discovery = true;
70 int udp_discovery_keepalive_interval = 9;
71 int udp_discovery_interval = 2;
72 int udp_discovery_timeout = 30;
74 #define MAX_SEQNO 1073741824
76 static void try_fix_mtu(node_t *n) {
80 if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
81 if(n->minmtu > n->maxmtu)
82 n->minmtu = n->maxmtu;
84 n->maxmtu = n->minmtu;
86 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
91 static void udp_probe_timeout_handler(void *data) {
93 if(!n->status.udp_confirmed)
96 logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
97 n->status.udp_confirmed = false;
103 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
104 if(!DATA(packet)[0]) {
105 /* It's a probe request, send back a reply */
107 if(!n->status.sptps && !n->status.validkey) {
108 // But not if we don't have his key.
109 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request from %s (%s) but we don't have his key yet", n->name, n->hostname);
113 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
115 /* Type 2 probe replies were introduced in protocol 17.3 */
116 if ((n->options >> 24) >= 3) {
117 uint8_t *data = DATA(packet);
119 uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
121 gettimeofday(&now, NULL);
122 uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
123 uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
124 packet->len = MIN_PROBE_SIZE;
126 /* Legacy protocol: n won't understand type 2 probe replies. */
130 /* Temporarily set udp_confirmed, so that the reply is sent
131 back exactly the way it came in. */
133 bool udp_confirmed = n->status.udp_confirmed;
134 n->status.udp_confirmed = true;
135 send_udppacket(n, packet);
136 n->status.udp_confirmed = udp_confirmed;
138 length_t probelen = len;
139 if (DATA(packet)[0] == 2) {
141 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
143 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
146 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
148 /* It's a valid reply: now we know bidirectional communication
149 is possible using the address and socket that the reply
151 n->status.udp_confirmed = true;
154 timeout_del(&n->udp_ping_timeout);
155 timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
158 if(probelen >= n->maxmtu + 1) {
159 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
161 /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
166 /* If applicable, raise the minimum supported MTU */
168 if(probelen > n->maxmtu)
169 probelen = n->maxmtu;
170 if(n->minmtu < probelen) {
171 n->minmtu = probelen;
176 The RTT is the time between the MTU probe burst was sent and the first
180 struct timeval now, diff;
181 gettimeofday(&now, NULL);
182 timersub(&now, &n->probe_time, &diff);
184 struct timeval probe_timestamp = now;
185 if (DATA(packet)[0] == 2 && packet->len >= 11) {
186 uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
187 uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
188 probe_timestamp.tv_sec = ntohl(sec);
189 probe_timestamp.tv_usec = ntohl(usec);
194 if(n->probe_counter == 1) {
195 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
196 n->probe_time = probe_timestamp;
197 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
202 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
204 memcpy(dest, source, len);
206 } else if(level == 10) {
208 lzo_uint lzolen = MAXSIZE;
209 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
214 } else if(level < 10) {
216 unsigned long destlen = MAXSIZE;
217 if(compress2(dest, &destlen, source, len, level) == Z_OK)
224 lzo_uint lzolen = MAXSIZE;
225 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
235 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
237 memcpy(dest, source, len);
239 } else if(level > 9) {
241 lzo_uint lzolen = MAXSIZE;
242 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
250 unsigned long destlen = MAXSIZE;
251 if(uncompress(dest, &destlen, source, len) == Z_OK)
263 static void receive_packet(node_t *n, vpn_packet_t *packet) {
264 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
265 packet->len, n->name, n->hostname);
268 n->in_bytes += packet->len;
273 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
275 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
277 #ifdef DISABLE_LEGACY
280 if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
283 return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
287 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
288 vpn_packet_t pkt1, pkt2;
289 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
292 pkt1.offset = DEFAULT_PACKET_OFFSET;
293 pkt2.offset = DEFAULT_PACKET_OFFSET;
295 if(n->status.sptps) {
296 if(!n->sptps.state) {
297 if(!n->status.waitingforkey) {
298 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
301 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
305 inpkt->offset += 2 * sizeof(node_id_t);
306 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
307 logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
313 #ifdef DISABLE_LEGACY
316 if(!n->status.validkey_in) {
317 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
321 /* Check packet length */
323 if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
324 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
325 n->name, n->hostname);
329 /* It's a legacy UDP packet, the data starts after the seqno */
331 inpkt->offset += sizeof(seqno_t);
333 /* Check the message authentication code */
335 if(digest_active(n->indigest)) {
336 inpkt->len -= digest_length(n->indigest);
337 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
338 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
342 /* Decrypt the packet */
344 if(cipher_active(n->incipher)) {
345 vpn_packet_t *outpkt = pkt[nextpkt++];
348 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
349 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
353 outpkt->len = outlen;
357 /* Check the sequence number */
360 memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
361 seqno = ntohl(seqno);
362 inpkt->len -= sizeof seqno;
365 if(seqno != n->received_seqno + 1) {
366 if(seqno >= n->received_seqno + replaywin * 8) {
367 if(n->farfuture++ < replaywin >> 2) {
368 logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
369 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
372 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
373 seqno - n->received_seqno - 1, n->name, n->hostname);
374 memset(n->late, 0, replaywin);
375 } else if (seqno <= n->received_seqno) {
376 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
377 logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
378 n->name, n->hostname, seqno, n->received_seqno);
382 for(int i = n->received_seqno + 1; i < seqno; i++)
383 n->late[(i / 8) % replaywin] |= 1 << i % 8;
388 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
391 if(seqno > n->received_seqno)
392 n->received_seqno = seqno;
396 if(n->received_seqno > MAX_SEQNO)
399 /* Decompress the packet */
401 length_t origlen = inpkt->len;
403 if(n->incompression) {
404 vpn_packet_t *outpkt = pkt[nextpkt++];
406 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
407 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
408 n->name, n->hostname);
414 origlen -= MTU/64 + 20;
419 if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
420 udp_probe_h(n, inpkt, origlen);
422 receive_packet(n, inpkt);
427 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
429 outpkt.offset = DEFAULT_PACKET_OFFSET;
431 if(len > sizeof outpkt.data - outpkt.offset)
435 if(c->options & OPTION_TCPONLY)
438 outpkt.priority = -1;
439 memcpy(DATA(&outpkt), buffer, len);
441 receive_packet(c->node, &outpkt);
444 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
445 if(!n->status.validkey && !n->connection)
451 if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
452 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
456 if(routing_mode == RMODE_ROUTER)
461 if(origpkt->len < offset)
466 if(n->outcompression) {
468 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
470 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
471 } else if(len < origpkt->len - offset) {
472 outpkt.len = len + offset;
474 type |= PKT_COMPRESSED;
478 /* If we have a direct metaconnection to n, and we can't use UDP, then
479 don't bother with SPTPS and just use a "plaintext" PACKET message.
480 We don't really care about end-to-end security since we're not
481 sending the message through any intermediate nodes. */
482 if(n->connection && origpkt->len > n->minmtu)
483 send_tcppacket(n->connection, origpkt);
485 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
489 static void adapt_socket(const sockaddr_t *sa, int *sock) {
490 /* Make sure we have a suitable socket for the chosen address */
491 if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
492 for(int i = 0; i < listen_sockets; i++) {
493 if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
501 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
506 /* If the UDP address is confirmed, use it. */
507 if(n->status.udp_confirmed)
510 /* Send every third packet to n->address; that could be set
511 to the node's reflexive UDP address discovered during key
520 /* Otherwise, address are found in edges to this node.
521 So we pick a random edge and a random socket. */
524 int j = rand() % n->edge_tree->count;
525 edge_t *candidate = NULL;
527 for splay_each(edge_t, e, n->edge_tree) {
529 candidate = e->reverse;
535 *sa = &candidate->address;
536 *sock = rand() % listen_sockets;
539 adapt_socket(*sa, sock);
542 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
545 /* Pick one of the edges from this node at random, then use its local address. */
548 int j = rand() % n->edge_tree->count;
549 edge_t *candidate = NULL;
551 for splay_each(edge_t, e, n->edge_tree) {
558 if (candidate && candidate->local_address.sa.sa_family) {
559 *sa = &candidate->local_address;
560 *sock = rand() % listen_sockets;
561 adapt_socket(*sa, sock);
565 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
566 vpn_packet_t pkt1, pkt2;
567 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
568 vpn_packet_t *inpkt = origpkt;
570 vpn_packet_t *outpkt;
571 int origlen = origpkt->len;
573 #if defined(SOL_IP) && defined(IP_TOS)
574 static int priority = 0;
575 int origpriority = origpkt->priority;
578 pkt1.offset = DEFAULT_PACKET_OFFSET;
579 pkt2.offset = DEFAULT_PACKET_OFFSET;
581 if(!n->status.reachable) {
582 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
587 return send_sptps_packet(n, origpkt);
589 #ifdef DISABLE_LEGACY
592 /* Make sure we have a valid key */
594 if(!n->status.validkey) {
595 logger(DEBUG_TRAFFIC, LOG_INFO,
596 "No valid key known yet for %s (%s), forwarding via TCP",
597 n->name, n->hostname);
598 send_tcppacket(n->nexthop->connection, origpkt);
602 if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
603 logger(DEBUG_TRAFFIC, LOG_INFO,
604 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
605 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
608 send_packet(n->nexthop, origpkt);
610 send_tcppacket(n->nexthop->connection, origpkt);
615 /* Compress the packet */
617 if(n->outcompression) {
618 outpkt = pkt[nextpkt++];
620 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
621 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
622 n->name, n->hostname);
629 /* Add sequence number */
631 seqno_t seqno = htonl(++(n->sent_seqno));
632 memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
633 inpkt->len += sizeof seqno;
635 /* Encrypt the packet */
637 if(cipher_active(n->outcipher)) {
638 outpkt = pkt[nextpkt++];
641 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
642 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
646 outpkt->len = outlen;
650 /* Add the message authentication code */
652 if(digest_active(n->outdigest)) {
653 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
654 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
658 inpkt->len += digest_length(n->outdigest);
661 /* Send the packet */
663 const sockaddr_t *sa = NULL;
666 if(n->status.send_locally)
667 choose_local_address(n, &sa, &sock);
669 choose_udp_address(n, &sa, &sock);
671 #if defined(SOL_IP) && defined(IP_TOS)
672 if(priorityinheritance && origpriority != priority
673 && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
674 priority = origpriority;
675 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
676 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
677 logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
681 if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
682 if(sockmsgsize(sockerrno)) {
683 if(n->maxmtu >= origlen)
684 n->maxmtu = origlen - 1;
685 if(n->mtu >= origlen)
686 n->mtu = origlen - 1;
689 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
693 origpkt->len = origlen;
697 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
698 node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
699 bool direct = from == myself && to == relay;
700 bool relay_supported = (relay->options >> 24) >= 4;
701 bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
703 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
704 TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
705 This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
707 if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
708 char buf[len * 4 / 3 + 5];
709 b64encode(data, buf, len);
710 /* If no valid key is known yet, send the packets using ANS_KEY requests,
711 to ensure we get to learn the reflexive UDP address. */
712 if(from == myself && !to->status.validkey) {
713 to->incompression = myself->incompression;
714 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
716 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
721 if(relay_supported) overhead += sizeof to->id + sizeof from->id;
722 char buf[len + overhead]; char* buf_ptr = buf;
723 if(relay_supported) {
725 /* Inform the recipient that this packet was sent directly. */
726 node_id_t nullid = {};
727 memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
729 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
731 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
734 /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
735 memcpy(buf_ptr, data, len); buf_ptr += len;
737 const sockaddr_t *sa = NULL;
739 if(relay->status.send_locally)
740 choose_local_address(relay, &sa, &sock);
742 choose_udp_address(relay, &sa, &sock);
743 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
744 if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
745 if(sockmsgsize(sockerrno)) {
746 // Compensate for SPTPS overhead
747 len -= SPTPS_DATAGRAM_OVERHEAD;
748 if(relay->maxmtu >= len)
749 relay->maxmtu = len - 1;
750 if(relay->mtu >= len)
751 relay->mtu = len - 1;
754 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
762 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
763 return send_sptps_data_priv(handle, myself, type, data, len);
766 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
767 node_t *from = handle;
769 if(type == SPTPS_HANDSHAKE) {
770 if(!from->status.validkey) {
771 from->status.validkey = true;
772 from->status.waitingforkey = false;
773 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
779 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
784 inpkt.offset = DEFAULT_PACKET_OFFSET;
786 if(type == PKT_PROBE) {
788 memcpy(DATA(&inpkt), data, len);
789 udp_probe_h(from, &inpkt, len);
793 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
794 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
798 /* Check if we have the headers we need */
799 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
800 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
802 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
803 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
806 int offset = (type & PKT_MAC) ? 0 : 14;
807 if(type & PKT_COMPRESSED) {
808 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
812 inpkt.len = ulen + offset;
814 if(inpkt.len > MAXSIZE)
817 memcpy(DATA(&inpkt) + offset, data, len);
818 inpkt.len = len + offset;
821 /* Generate the Ethernet packet type if necessary */
823 switch(DATA(&inpkt)[14] >> 4) {
825 DATA(&inpkt)[12] = 0x08;
826 DATA(&inpkt)[13] = 0x00;
829 DATA(&inpkt)[12] = 0x86;
830 DATA(&inpkt)[13] = 0xDD;
833 logger(DEBUG_TRAFFIC, LOG_ERR,
834 "Unknown IP version %d while reading packet from %s (%s)",
835 DATA(&inpkt)[14] >> 4, from->name, from->hostname);
840 receive_packet(from, &inpkt);
844 // This function tries to get SPTPS keys, if they aren't already known.
845 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
846 static void try_sptps(node_t *n) {
847 if(n->status.validkey)
850 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
852 if(!n->status.waitingforkey)
854 else if(n->last_req_key + 10 < now.tv_sec) {
855 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
856 sptps_stop(&n->sptps);
857 n->status.waitingforkey = false;
864 static void send_udp_probe_packet(node_t *n, int len) {
866 packet.offset = DEFAULT_PACKET_OFFSET;
867 memset(DATA(&packet), 0, 14);
868 randomize(DATA(&packet) + 14, len - 14);
872 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
874 send_udppacket(n, &packet);
877 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
878 // If a tunnel is already established, it makes sure it stays up.
879 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
880 static void try_udp(node_t* n) {
884 struct timeval ping_tx_elapsed;
885 timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
887 int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
889 if(ping_tx_elapsed.tv_sec >= interval) {
890 send_udp_probe_packet(n, MIN_PROBE_SIZE);
891 n->udp_ping_sent = now;
893 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
894 n->status.send_locally = true;
895 send_udp_probe_packet(n, MIN_PROBE_SIZE);
896 n->status.send_locally = false;
901 static length_t choose_initial_maxmtu(node_t *n) {
906 const sockaddr_t *sa = NULL;
908 choose_udp_address(n, &sa, &sockindex);
912 sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
914 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
918 if(connect(sock, &sa->sa, SALEN(sa->sa))) {
919 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
925 socklen_t ip_mtu_len = sizeof ip_mtu;
926 if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
927 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
934 /* getsockopt(IP_MTU) returns the MTU of the physical interface.
935 We need to remove various overheads to get to the tinc MTU. */
936 length_t mtu = ip_mtu;
937 mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
939 if(n->status.sptps) {
940 mtu -= SPTPS_DATAGRAM_OVERHEAD;
941 if((n->options >> 24) >= 4)
942 mtu -= sizeof(node_id_t) + sizeof(node_id_t);
944 mtu -= digest_length(n->outdigest);
946 /* Now it's tricky. We use CBC mode, so the length of the
947 encrypted payload must be a multiple of the blocksize. The
948 sequence number is also part of the encrypted payload, so we
949 must account for it after correcting for the blocksize.
950 Furthermore, the padding in the last block must be at least
953 length_t blocksize = cipher_blocksize(n->outcipher);
965 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
971 logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
981 /* This function tries to determines the MTU of a node.
982 By calling this function repeatedly, n->minmtu will be progressively
983 increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU
984 is already fixed, this function checks if it can be increased.
987 static void try_mtu(node_t *n) {
988 if(!(n->options & OPTION_PMTU_DISCOVERY))
991 if(udp_discovery && !n->status.udp_confirmed) {
998 /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
999 mtuprobes == 20: fix MTU, and go to -1
1000 mtuprobes == -1: send one >maxmtu probe every pingtimeout */
1002 struct timeval elapsed;
1003 timersub(&now, &n->probe_sent_time, &elapsed);
1004 if(n->mtuprobes >= 0) {
1005 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
1008 if(elapsed.tv_sec < pingtimeout)
1014 if(n->mtuprobes < 0) {
1015 /* After the initial discovery, we only send one maxmtu and one
1016 maxmtu+1 probe to detect PMTU increases. */
1017 send_udp_probe_packet(n, n->maxmtu);
1018 if(n->maxmtu + 1 < MTU)
1019 send_udp_probe_packet(n, n->maxmtu + 1);
1021 /* Before initial discovery begins, set maxmtu to the most likely value.
1022 If it's underestimated, we will correct it after initial discovery. */
1023 if(n->mtuprobes == 0)
1024 n->maxmtu = choose_initial_maxmtu(n);
1027 /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
1028 but it will typically increase convergence time in the no-loss case. */
1029 const length_t probes_per_cycle = 8;
1031 /* This magic value was determined using math simulations.
1032 It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
1033 Since 1407 is just below the range of tinc MTUs over typical networks,
1034 this fine-tuning allows tinc to cover a lot of ground very quickly.
1035 This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1036 then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1037 if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1038 const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1040 const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1041 const length_t minmtu = MAX(n->minmtu, 512);
1042 const float interval = n->maxmtu - minmtu;
1044 /* The core of the discovery algorithm is this exponential.
1045 It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1046 This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1047 are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1048 on the precise MTU as we are approaching it.
1049 The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1050 reply per cycle so that we can make progress. */
1051 const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1053 length_t maxmtu = n->maxmtu;
1054 send_udp_probe_packet(n, minmtu + offset);
1055 /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
1056 In that case, we recalculate with the new maxmtu and try again. */
1057 if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
1061 if(n->mtuprobes >= 0)
1065 n->probe_counter = 0;
1066 n->probe_sent_time = now;
1067 n->probe_time = now;
1069 /* Calculate the packet loss of incoming traffic by comparing the rate of
1070 packets received to the rate with which the sequence number has increased.
1071 TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
1074 if(n->received > n->prev_received)
1075 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
1077 n->packetloss = n->received_seqno <= n->prev_received_seqno;
1079 n->prev_received_seqno = n->received_seqno;
1080 n->prev_received = n->received;
1083 /* These functions try to establish a tunnel to a node (or its relay) so that
1084 packets can be sent (e.g. exchange keys).
1085 If a tunnel is already established, it tries to improve it (e.g. by trying
1086 to establish a UDP tunnel instead of TCP). This function makes no
1087 guarantees - it is up to the caller to check the node's state to figure out
1088 if TCP and/or UDP is usable. By calling this function repeatedly, the
1089 tunnel is gradually improved until we hit the wall imposed by the underlying
1090 network environment. It is recommended to call this function every time a
1091 packet is sent (or intended to be sent) to a node, so that the tunnel keeps
1092 improving as packets flow, and then gracefully downgrades itself as it goes
1096 static void try_tx_sptps(node_t *n, bool mtu) {
1097 /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1098 messages anyway, so there's no need for SPTPS at all. */
1100 if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
1103 /* Otherwise, try to do SPTPS authentication with n if necessary. */
1107 /* Do we need to relay packets? */
1109 node_t *via = (n->via == myself) ? n->nexthop : n->via;
1111 /* If the relay doesn't support SPTPS, everything goes via TCP anyway. */
1113 if((via->options >> 24) < 4)
1116 /* If we do have a relay, try everything with that one instead. */
1119 return try_tx_sptps(via, mtu);
1126 static void try_tx_legacy(node_t *n, bool mtu) {
1127 /* Does he have our key? If not, send one. */
1129 if(!n->status.validkey_in)
1132 /* Check if we already have a key, or request one. */
1134 if(!n->status.validkey) {
1135 if(n->last_req_key + 10 <= now.tv_sec) {
1137 n->last_req_key = now.tv_sec;
1147 void try_tx(node_t *n, bool mtu) {
1149 try_tx_sptps(n, mtu);
1151 try_tx_legacy(n, mtu);
1154 void send_packet(node_t *n, vpn_packet_t *packet) {
1155 // If it's for myself, write it to the tun/tap device.
1159 memcpy(DATA(packet), mymac.x, ETH_ALEN);
1161 n->out_bytes += packet->len;
1162 devops.write(packet);
1166 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname);
1168 // If the node is not reachable, drop it.
1170 if(!n->status.reachable) {
1171 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname);
1175 // Keep track of packet statistics.
1178 n->out_bytes += packet->len;
1180 // Check if it should be sent as an SPTPS packet.
1182 if(n->status.sptps) {
1183 send_sptps_packet(n, packet);
1184 try_tx_sptps(n, true);
1188 // Determine which node to actually send it to.
1190 node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1193 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname);
1195 // Try to send via UDP, unless TCP is forced.
1197 if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1198 if(!send_tcppacket(via->connection, packet))
1199 terminate_connection(via->connection, true);
1203 send_udppacket(via, packet);
1204 try_tx_legacy(via, true);
1207 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1208 // Always give ourself a copy of the packet.
1210 send_packet(myself, packet);
1212 // In TunnelServer mode, do not forward broadcast packets.
1213 // The MST might not be valid and create loops.
1214 if(tunnelserver || broadcast_mode == BMODE_NONE)
1217 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1218 packet->len, from->name, from->hostname);
1220 switch(broadcast_mode) {
1221 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1222 // This guarantees all nodes receive the broadcast packet, and
1223 // usually distributes the sending of broadcast packets over all nodes.
1225 for list_each(connection_t, c, connection_list)
1226 if(c->edge && c->status.mst && c != from->nexthop->connection)
1227 send_packet(c->node, packet);
1230 // In direct mode, we send copies to each node we know of.
1231 // However, this only reaches nodes that can be reached in a single hop.
1232 // We don't have enough information to forward broadcast packets in this case.
1237 for splay_each(node_t, n, node_tree)
1238 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1239 send_packet(n, packet);
1247 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1250 static time_t last_hard_try = 0;
1252 for splay_each(edge_t, e, edge_weight_tree) {
1253 if(!e->to->status.reachable || e->to == myself)
1256 if(sockaddrcmp_noport(from, &e->address)) {
1257 if(last_hard_try == now.tv_sec)
1262 if(!try_mac(e->to, pkt))
1270 last_hard_try = now.tv_sec;
1272 last_hard_try = now.tv_sec;
1276 void handle_incoming_vpn_data(void *data, int flags) {
1277 listen_socket_t *ls = data;
1280 node_id_t nullid = {};
1281 sockaddr_t addr = {};
1282 socklen_t addrlen = sizeof addr;
1284 bool direct = false;
1287 int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1289 if(len <= 0 || len > MAXSIZE) {
1290 if(!sockwouldblock(sockerrno))
1291 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1297 sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1299 // Try to figure out who sent this packet.
1301 node_t *n = lookup_node_udp(&addr);
1304 // It might be from a 1.1 node, which might have a source ID in the packet.
1305 pkt.offset = 2 * sizeof(node_id_t);
1306 from = lookup_node_id(SRCID(&pkt));
1307 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1308 if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1317 n = try_harder(&addr, &pkt);
1322 if(debug_level >= DEBUG_PROTOCOL) {
1323 hostname = sockaddr2hostname(&addr);
1324 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1330 if(n->status.sptps) {
1331 pkt.offset = 2 * sizeof(node_id_t);
1333 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1338 from = lookup_node_id(SRCID(&pkt));
1339 to = lookup_node_id(DSTID(&pkt));
1342 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1347 send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1356 if(!receive_udppacket(from, &pkt))
1359 n->sock = ls - listen_socket;
1360 if(direct && sockaddrcmp(&addr, &n->address))
1361 update_node_udp(n, &addr);
1364 void handle_device_data(void *data, int flags) {
1365 vpn_packet_t packet;
1366 packet.offset = DEFAULT_PACKET_OFFSET;
1367 packet.priority = 0;
1369 if(devops.read(&packet)) {
1370 myself->in_packets++;
1371 myself->in_bytes += packet.len;
1372 route(myself, &packet);