c3e8c6246206ed5226eef5dd0de2d8ba3e68944d
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2013 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 int keylifetime = 0;
50 #ifdef HAVE_LZO
51 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
52 #endif
53
54 static void send_udppacket(node_t *, vpn_packet_t *);
55
56 unsigned replaywin = 16;
57 bool localdiscovery = true;
58
59 #define MAX_SEQNO 1073741824
60
61 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
62    mtuprobes ==    31: sleep pinginterval seconds
63    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
64    mtuprobes ==    33: no response from other side, restart PMTU discovery process
65
66    Probes are sent in batches of at least three, with random sizes between the
67    lower and upper boundaries for the MTU thus far discovered.
68
69    After the initial discovery, a fourth packet is added to each batch with a
70    size larger than the currently known PMTU, to test if the PMTU has increased.
71
72    In case local discovery is enabled, another packet is added to each batch,
73    which will be broadcast to the local network.
74
75 */
76
77 static void send_mtu_probe_handler(void *data) {
78         node_t *n = data;
79         int timeout = 1;
80
81         n->mtuprobes++;
82
83         if(!n->status.reachable || !n->status.validkey) {
84                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
85                 n->mtuprobes = 0;
86                 return;
87         }
88
89         if(n->mtuprobes > 32) {
90                 if(!n->minmtu) {
91                         n->mtuprobes = 31;
92                         timeout = pinginterval;
93                         goto end;
94                 }
95
96                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
97                 n->status.udp_confirmed = false;
98                 n->mtuprobes = 1;
99                 n->minmtu = 0;
100                 n->maxmtu = MTU;
101         }
102
103         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
104                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
105                 n->mtuprobes = 31;
106         }
107
108         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
109                 if(n->minmtu > n->maxmtu)
110                         n->minmtu = n->maxmtu;
111                 else
112                         n->maxmtu = n->minmtu;
113                 n->mtu = n->minmtu;
114                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
115                 n->mtuprobes = 31;
116         }
117
118         if(n->mtuprobes == 31) {
119                 timeout = pinginterval;
120                 goto end;
121         } else if(n->mtuprobes == 32) {
122                 timeout = pingtimeout;
123         }
124
125         for(int i = 0; i < 4 + localdiscovery; i++) {
126                 int len;
127
128                 if(i == 0) {
129                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
130                                 continue;
131                         len = n->maxmtu + 8;
132                 } else if(n->maxmtu <= n->minmtu) {
133                         len = n->maxmtu;
134                 } else {
135                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
136                 }
137
138                 if(len < 64)
139                         len = 64;
140
141                 vpn_packet_t packet;
142                 memset(packet.data, 0, 14);
143                 randomize(packet.data + 14, len - 14);
144                 packet.len = len;
145                 packet.priority = 0;
146                 n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
147
148                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
149
150                 send_udppacket(n, &packet);
151         }
152
153         n->status.send_locally = false;
154         n->probe_counter = 0;
155         gettimeofday(&n->probe_time, NULL);
156
157         /* Calculate the packet loss of incoming traffic by comparing the rate of
158            packets received to the rate with which the sequence number has increased.
159          */
160
161         if(n->received > n->prev_received)
162                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
163         else
164                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
165
166         n->prev_received_seqno = n->received_seqno;
167         n->prev_received = n->received;
168
169 end:
170         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
171 }
172
173 void send_mtu_probe(node_t *n) {
174         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
175         send_mtu_probe_handler(n);
176 }
177
178 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
179         if(!packet->data[0]) {
180                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
181
182                 /* It's a probe request, send back a reply */
183
184                 /* Type 2 probe replies were introduced in protocol 17.3 */
185                 if ((n->options >> 24) >= 3) {
186                         uint8_t* data = packet->data;
187                         *data++ = 2;
188                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
189                         struct timeval now;
190                         gettimeofday(&now, NULL);
191                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
192                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
193                         packet->len = data - packet->data;
194                 } else {
195                         /* Legacy protocol: n won't understand type 2 probe replies. */
196                         packet->data[0] = 1;
197                 }
198
199                 /* Temporarily set udp_confirmed, so that the reply is sent
200                    back exactly the way it came in. */
201
202                 bool udp_confirmed = n->status.udp_confirmed;
203                 n->status.udp_confirmed = true;
204                 send_udppacket(n, packet);
205                 n->status.udp_confirmed = udp_confirmed;
206         } else {
207                 length_t probelen = len;
208                 if (packet->data[0] == 2) {
209                         if (len < 3)
210                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
211                         else {
212                                 uint16_t probelen16; memcpy(&probelen16, packet->data + 1, 2); probelen = ntohs(probelen16);
213                         }
214                 }
215                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", packet->data[0], probelen, n->name, n->hostname);
216
217                 /* It's a valid reply: now we know bidirectional communication
218                    is possible using the address and socket that the reply
219                    packet used. */
220
221                 n->status.udp_confirmed = true;
222
223                 /* If we haven't established the PMTU yet, restart the discovery process. */
224
225                 if(n->mtuprobes > 30) {
226                         if (probelen == n->maxmtu + 8) {
227                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
228                                 n->maxmtu = MTU;
229                                 n->mtuprobes = 10;
230                                 return;
231                         }
232
233                         if(n->minmtu)
234                                 n->mtuprobes = 30;
235                         else
236                                 n->mtuprobes = 1;
237                 }
238
239                 /* If applicable, raise the minimum supported MTU */
240
241                 if(probelen > n->maxmtu)
242                         probelen = n->maxmtu;
243                 if(n->minmtu < probelen)
244                         n->minmtu = probelen;
245
246                 /* Calculate RTT and bandwidth.
247                    The RTT is the time between the MTU probe burst was sent and the first
248                    reply is received. The bandwidth is measured using the time between the
249                    arrival of the first and third probe reply (or type 2 probe requests).
250                  */
251
252                 struct timeval now, diff;
253                 gettimeofday(&now, NULL);
254                 timersub(&now, &n->probe_time, &diff);
255
256                 struct timeval probe_timestamp = now;
257                 if (packet->data[0] == 2 && packet->len >= 11) {
258                         uint32_t sec; memcpy(&sec, packet->data + 3, 4);
259                         uint32_t usec; memcpy(&usec, packet->data + 7, 4);
260                         probe_timestamp.tv_sec = ntohl(sec);
261                         probe_timestamp.tv_usec = ntohl(usec);
262                 }
263                 
264                 n->probe_counter++;
265
266                 if(n->probe_counter == 1) {
267                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
268                         n->probe_time = probe_timestamp;
269                 } else if(n->probe_counter == 3) {
270                         struct timeval probe_timestamp_diff;
271                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
272                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
273                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
274                 }
275         }
276 }
277
278 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
279         if(level == 0) {
280                 memcpy(dest, source, len);
281                 return len;
282         } else if(level == 10) {
283 #ifdef HAVE_LZO
284                 lzo_uint lzolen = MAXSIZE;
285                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
286                 return lzolen;
287 #else
288                 return -1;
289 #endif
290         } else if(level < 10) {
291 #ifdef HAVE_ZLIB
292                 unsigned long destlen = MAXSIZE;
293                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
294                         return destlen;
295                 else
296 #endif
297                         return -1;
298         } else {
299 #ifdef HAVE_LZO
300                 lzo_uint lzolen = MAXSIZE;
301                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
302                 return lzolen;
303 #else
304                 return -1;
305 #endif
306         }
307
308         return -1;
309 }
310
311 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
312         if(level == 0) {
313                 memcpy(dest, source, len);
314                 return len;
315         } else if(level > 9) {
316 #ifdef HAVE_LZO
317                 lzo_uint lzolen = MAXSIZE;
318                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
319                         return lzolen;
320                 else
321 #endif
322                         return -1;
323         }
324 #ifdef HAVE_ZLIB
325         else {
326                 unsigned long destlen = MAXSIZE;
327                 if(uncompress(dest, &destlen, source, len) == Z_OK)
328                         return destlen;
329                 else
330                         return -1;
331         }
332 #endif
333
334         return -1;
335 }
336
337 /* VPN packet I/O */
338
339 static void receive_packet(node_t *n, vpn_packet_t *packet) {
340         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
341                            packet->len, n->name, n->hostname);
342
343         n->in_packets++;
344         n->in_bytes += packet->len;
345
346         route(n, packet);
347 }
348
349 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
350         if(n->status.sptps)
351                 return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
352
353         if(!digest_active(n->indigest) || inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest))
354                 return false;
355
356         return digest_verify(n->indigest, &inpkt->seqno, inpkt->len - digest_length(n->indigest), (const char *)&inpkt->seqno + inpkt->len - digest_length(n->indigest));
357 }
358
359 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
360         vpn_packet_t pkt1, pkt2;
361         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
362         int nextpkt = 0;
363         size_t outlen;
364
365         if(n->status.sptps) {
366                 if(!n->sptps.state) {
367                         if(!n->status.waitingforkey) {
368                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
369                                 send_req_key(n);
370                         } else {
371                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
372                         }
373                         return false;
374                 }
375                 if(!sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len)) {
376                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
377                         return false;
378                 }
379                 return true;
380         }
381
382         if(!n->status.validkey) {
383                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
384                 return false;
385         }
386
387         /* Check packet length */
388
389         if(inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest)) {
390                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
391                                         n->name, n->hostname);
392                 return false;
393         }
394
395         /* Check the message authentication code */
396
397         if(digest_active(n->indigest)) {
398                 inpkt->len -= digest_length(n->indigest);
399                 if(!digest_verify(n->indigest, &inpkt->seqno, inpkt->len, (const char *)&inpkt->seqno + inpkt->len)) {
400                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
401                         return false;
402                 }
403         }
404         /* Decrypt the packet */
405
406         if(cipher_active(n->incipher)) {
407                 vpn_packet_t *outpkt = pkt[nextpkt++];
408                 outlen = MAXSIZE;
409
410                 if(!cipher_decrypt(n->incipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
411                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
412                         return false;
413                 }
414
415                 outpkt->len = outlen;
416                 inpkt = outpkt;
417         }
418
419         /* Check the sequence number */
420
421         inpkt->len -= sizeof inpkt->seqno;
422         uint32_t seqno;
423         memcpy(&seqno, inpkt->seqno, sizeof seqno);
424         seqno = ntohl(seqno);
425
426         if(replaywin) {
427                 if(seqno != n->received_seqno + 1) {
428                         if(seqno >= n->received_seqno + replaywin * 8) {
429                                 if(n->farfuture++ < replaywin >> 2) {
430                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
431                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
432                                         return false;
433                                 }
434                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
435                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
436                                 memset(n->late, 0, replaywin);
437                         } else if (seqno <= n->received_seqno) {
438                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
439                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
440                                                 n->name, n->hostname, seqno, n->received_seqno);
441                                         return false;
442                                 }
443                         } else {
444                                 for(int i = n->received_seqno + 1; i < seqno; i++)
445                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
446                         }
447                 }
448
449                 n->farfuture = 0;
450                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
451         }
452
453         if(seqno > n->received_seqno)
454                 n->received_seqno = seqno;
455
456         n->received++;
457
458         if(n->received_seqno > MAX_SEQNO)
459                 regenerate_key();
460
461         /* Decompress the packet */
462
463         length_t origlen = inpkt->len;
464
465         if(n->incompression) {
466                 vpn_packet_t *outpkt = pkt[nextpkt++];
467
468                 if((outpkt->len = uncompress_packet(outpkt->data, inpkt->data, inpkt->len, n->incompression)) < 0) {
469                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
470                                                  n->name, n->hostname);
471                         return false;
472                 }
473
474                 inpkt = outpkt;
475
476                 origlen -= MTU/64 + 20;
477         }
478
479         inpkt->priority = 0;
480
481         if(!inpkt->data[12] && !inpkt->data[13])
482                 mtu_probe_h(n, inpkt, origlen);
483         else
484                 receive_packet(n, inpkt);
485         return true;
486 }
487
488 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
489         vpn_packet_t outpkt;
490
491         if(len > sizeof outpkt.data)
492                 return;
493
494         outpkt.len = len;
495         if(c->options & OPTION_TCPONLY)
496                 outpkt.priority = 0;
497         else
498                 outpkt.priority = -1;
499         memcpy(outpkt.data, buffer, len);
500
501         receive_packet(c->node, &outpkt);
502 }
503
504 static bool try_sptps(node_t *n) {
505         if(n->status.validkey)
506                 return true;
507
508         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
509
510         if(!n->status.waitingforkey)
511                 send_req_key(n);
512         else if(n->last_req_key + 10 < now.tv_sec) {
513                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
514                 sptps_stop(&n->sptps);
515                 n->status.waitingforkey = false;
516                 send_req_key(n);
517         }
518
519         return false;
520 }
521
522 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
523         if (!try_sptps(n))
524                 return;
525
526         uint8_t type = 0;
527         int offset = 0;
528
529         if(!(origpkt->data[12] | origpkt->data[13])) {
530                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
531                 return;
532         }
533
534         if(routing_mode == RMODE_ROUTER)
535                 offset = 14;
536         else
537                 type = PKT_MAC;
538
539         if(origpkt->len < offset)
540                 return;
541
542         vpn_packet_t outpkt;
543
544         if(n->outcompression) {
545                 int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
546                 if(len < 0) {
547                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
548                 } else if(len < origpkt->len - offset) {
549                         outpkt.len = len + offset;
550                         origpkt = &outpkt;
551                         type |= PKT_COMPRESSED;
552                 }
553         }
554
555         sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
556         return;
557 }
558
559 static void adapt_socket(const sockaddr_t *sa, int *sock) {
560         /* Make sure we have a suitable socket for the chosen address */
561         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
562                 for(int i = 0; i < listen_sockets; i++) {
563                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
564                                 *sock = i;
565                                 break;
566                         }
567                 }
568         }
569 }
570
571 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
572         /* Latest guess */
573         *sa = &n->address;
574         *sock = n->sock;
575
576         /* If the UDP address is confirmed, use it. */
577         if(n->status.udp_confirmed)
578                 return;
579
580         /* Send every third packet to n->address; that could be set
581            to the node's reflexive UDP address discovered during key
582            exchange. */
583
584         static int x = 0;
585         if(++x >= 3) {
586                 x = 0;
587                 return;
588         }
589
590         /* Otherwise, address are found in edges to this node.
591            So we pick a random edge and a random socket. */
592
593         int i = 0;
594         int j = rand() % n->edge_tree->count;
595         edge_t *candidate = NULL;
596
597         for splay_each(edge_t, e, n->edge_tree) {
598                 if(i++ == j) {
599                         candidate = e->reverse;
600                         break;
601                 }
602         }
603
604         if(candidate) {
605                 *sa = &candidate->address;
606                 *sock = rand() % listen_sockets;
607         }
608
609         adapt_socket(*sa, sock);
610 }
611
612 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
613         *sa = NULL;
614
615         /* Pick one of the edges from this node at random, then use its local address. */
616
617         int i = 0;
618         int j = rand() % n->edge_tree->count;
619         edge_t *candidate = NULL;
620
621         for splay_each(edge_t, e, n->edge_tree) {
622                 if(i++ == j) {
623                         candidate = e;
624                         break;
625                 }
626         }
627
628         if (candidate && candidate->local_address.sa.sa_family) {
629                 *sa = &candidate->local_address;
630                 *sock = rand() % listen_sockets;
631                 adapt_socket(*sa, sock);
632         }
633 }
634
635 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
636         vpn_packet_t pkt1, pkt2;
637         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
638         vpn_packet_t *inpkt = origpkt;
639         int nextpkt = 0;
640         vpn_packet_t *outpkt;
641         int origlen = origpkt->len;
642         size_t outlen;
643 #if defined(SOL_IP) && defined(IP_TOS)
644         static int priority = 0;
645         int origpriority = origpkt->priority;
646 #endif
647
648         if(!n->status.reachable) {
649                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
650                 return;
651         }
652
653         if(n->status.sptps)
654                 return send_sptps_packet(n, origpkt);
655
656         /* Make sure we have a valid key */
657
658         if(!n->status.validkey) {
659                 logger(DEBUG_TRAFFIC, LOG_INFO,
660                                    "No valid key known yet for %s (%s), forwarding via TCP",
661                                    n->name, n->hostname);
662
663                 if(n->last_req_key + 10 <= now.tv_sec) {
664                         send_req_key(n);
665                         n->last_req_key = now.tv_sec;
666                 }
667
668                 send_tcppacket(n->nexthop->connection, origpkt);
669
670                 return;
671         }
672
673         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
674                 logger(DEBUG_TRAFFIC, LOG_INFO,
675                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
676                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
677
678                 if(n != n->nexthop)
679                         send_packet(n->nexthop, origpkt);
680                 else
681                         send_tcppacket(n->nexthop->connection, origpkt);
682
683                 return;
684         }
685
686         /* Compress the packet */
687
688         if(n->outcompression) {
689                 outpkt = pkt[nextpkt++];
690
691                 if((outpkt->len = compress_packet(outpkt->data, inpkt->data, inpkt->len, n->outcompression)) < 0) {
692                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
693                                    n->name, n->hostname);
694                         return;
695                 }
696
697                 inpkt = outpkt;
698         }
699
700         /* Add sequence number */
701
702         uint32_t seqno = htonl(++(n->sent_seqno));
703         memcpy(inpkt->seqno, &seqno, sizeof inpkt->seqno);
704         inpkt->len += sizeof inpkt->seqno;
705
706         /* Encrypt the packet */
707
708         if(cipher_active(n->outcipher)) {
709                 outpkt = pkt[nextpkt++];
710                 outlen = MAXSIZE;
711
712                 if(!cipher_encrypt(n->outcipher, inpkt->seqno, inpkt->len, outpkt->seqno, &outlen, true)) {
713                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
714                         goto end;
715                 }
716
717                 outpkt->len = outlen;
718                 inpkt = outpkt;
719         }
720
721         /* Add the message authentication code */
722
723         if(digest_active(n->outdigest)) {
724                 if(!digest_create(n->outdigest, inpkt->seqno, inpkt->len, inpkt->seqno + inpkt->len)) {
725                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
726                         goto end;
727                 }
728
729                 inpkt->len += digest_length(n->outdigest);
730         }
731
732         /* Send the packet */
733
734         const sockaddr_t *sa = NULL;
735         int sock;
736
737         if(n->status.send_locally)
738                 choose_local_address(n, &sa, &sock);
739         if(!sa)
740                 choose_udp_address(n, &sa, &sock);
741
742 #if defined(SOL_IP) && defined(IP_TOS)
743         if(priorityinheritance && origpriority != priority
744            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
745                 priority = origpriority;
746                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
747                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
748                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
749         }
750 #endif
751
752         if(sendto(listen_socket[sock].udp.fd, inpkt->seqno, inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
753                 if(sockmsgsize(sockerrno)) {
754                         if(n->maxmtu >= origlen)
755                                 n->maxmtu = origlen - 1;
756                         if(n->mtu >= origlen)
757                                 n->mtu = origlen - 1;
758                 } else
759                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
760         }
761
762 end:
763         origpkt->len = origlen;
764 }
765
766 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
767         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
768         bool direct = from == myself && to == relay;
769         bool relay_supported = (relay->options >> 24) >= 4;
770         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
771
772         /* We don't really need the relay's key, but we need to establish a UDP tunnel with it and discover its MTU. */
773         if (!direct && relay_supported && !tcponly)
774                 try_sptps(relay);
775
776         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
777            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
778                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
779
780         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
781                 char buf[len * 4 / 3 + 5];
782                 b64encode(data, buf, len);
783                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
784                    to ensure we get to learn the reflexive UDP address. */
785                 if(from == myself && !to->status.validkey) {
786                         to->incompression = myself->incompression;
787                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
788                 } else {
789                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
790                 }
791         }
792
793         size_t overhead = 0;
794         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
795         char buf[len + overhead]; char* buf_ptr = buf;
796         if(relay_supported) {
797                 if(direct) {
798                         /* Inform the recipient that this packet was sent directly. */
799                         node_id_t nullid = {};
800                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
801                 } else {
802                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
803                 }
804                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
805
806         }
807         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
808         memcpy(buf_ptr, data, len); buf_ptr += len;
809
810         const sockaddr_t *sa = NULL;
811         int sock;
812         if(relay->status.send_locally)
813                 choose_local_address(relay, &sa, &sock);
814         if(!sa)
815                 choose_udp_address(relay, &sa, &sock);
816         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
817         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
818                 if(sockmsgsize(sockerrno)) {
819                         // Compensate for SPTPS overhead
820                         len -= SPTPS_DATAGRAM_OVERHEAD;
821                         if(relay->maxmtu >= len)
822                                 relay->maxmtu = len - 1;
823                         if(relay->mtu >= len)
824                                 relay->mtu = len - 1;
825                 } else {
826                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
827                         return false;
828                 }
829         }
830
831         return true;
832 }
833
834 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
835         return send_sptps_data_priv(handle, myself, type, data, len);
836 }
837
838 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
839         node_t *from = handle;
840
841         if(type == SPTPS_HANDSHAKE) {
842                 if(!from->status.validkey) {
843                         from->status.validkey = true;
844                         from->status.waitingforkey = false;
845                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
846                 }
847                 return true;
848         }
849
850         if(len > MTU) {
851                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
852                 return false;
853         }
854
855         vpn_packet_t inpkt;
856
857         if(type == PKT_PROBE) {
858                 inpkt.len = len;
859                 memcpy(inpkt.data, data, len);
860                 mtu_probe_h(from, &inpkt, len);
861                 return true;
862         }
863
864         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
865                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
866                 return false;
867         }
868
869         /* Check if we have the headers we need */
870         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
871                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
872                 return false;
873         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
874                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
875         }
876
877         int offset = (type & PKT_MAC) ? 0 : 14;
878         if(type & PKT_COMPRESSED) {
879                 length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
880                 if(ulen < 0) {
881                         return false;
882                 } else {
883                         inpkt.len = ulen + offset;
884                 }
885                 if(inpkt.len > MAXSIZE)
886                         abort();
887         } else {
888                 memcpy(inpkt.data + offset, data, len);
889                 inpkt.len = len + offset;
890         }
891
892         /* Generate the Ethernet packet type if necessary */
893         if(offset) {
894                 switch(inpkt.data[14] >> 4) {
895                         case 4:
896                                 inpkt.data[12] = 0x08;
897                                 inpkt.data[13] = 0x00;
898                                 break;
899                         case 6:
900                                 inpkt.data[12] = 0x86;
901                                 inpkt.data[13] = 0xDD;
902                                 break;
903                         default:
904                                 logger(DEBUG_TRAFFIC, LOG_ERR,
905                                                    "Unknown IP version %d while reading packet from %s (%s)",
906                                                    inpkt.data[14] >> 4, from->name, from->hostname);
907                                 return false;
908                 }
909         }
910
911         receive_packet(from, &inpkt);
912         return true;
913 }
914
915 /*
916   send a packet to the given vpn ip.
917 */
918 void send_packet(node_t *n, vpn_packet_t *packet) {
919         node_t *via;
920
921         if(n == myself) {
922                 if(overwrite_mac)
923                          memcpy(packet->data, mymac.x, ETH_ALEN);
924                 n->out_packets++;
925                 n->out_bytes += packet->len;
926                 devops.write(packet);
927                 return;
928         }
929
930         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
931                            packet->len, n->name, n->hostname);
932
933         if(!n->status.reachable) {
934                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
935                                    n->name, n->hostname);
936                 return;
937         }
938
939         n->out_packets++;
940         n->out_bytes += packet->len;
941
942         if(n->status.sptps) {
943                 send_sptps_packet(n, packet);
944                 return;
945         }
946
947         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
948
949         if(via != n)
950                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
951                            n->name, via->name, n->via->hostname);
952
953         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
954                 if(!send_tcppacket(via->connection, packet))
955                         terminate_connection(via->connection, true);
956         } else
957                 send_udppacket(via, packet);
958 }
959
960 /* Broadcast a packet using the minimum spanning tree */
961
962 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
963         // Always give ourself a copy of the packet.
964         if(from != myself)
965                 send_packet(myself, packet);
966
967         // In TunnelServer mode, do not forward broadcast packets.
968         // The MST might not be valid and create loops.
969         if(tunnelserver || broadcast_mode == BMODE_NONE)
970                 return;
971
972         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
973                            packet->len, from->name, from->hostname);
974
975         switch(broadcast_mode) {
976                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
977                 // This guarantees all nodes receive the broadcast packet, and
978                 // usually distributes the sending of broadcast packets over all nodes.
979                 case BMODE_MST:
980                         for list_each(connection_t, c, connection_list)
981                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
982                                         send_packet(c->node, packet);
983                         break;
984
985                 // In direct mode, we send copies to each node we know of.
986                 // However, this only reaches nodes that can be reached in a single hop.
987                 // We don't have enough information to forward broadcast packets in this case.
988                 case BMODE_DIRECT:
989                         if(from != myself)
990                                 break;
991
992                         for splay_each(node_t, n, node_tree)
993                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
994                                         send_packet(n, packet);
995                         break;
996
997                 default:
998                         break;
999         }
1000 }
1001
1002 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1003         node_t *n = NULL;
1004         bool hard = false;
1005         static time_t last_hard_try = 0;
1006
1007         for splay_each(edge_t, e, edge_weight_tree) {
1008                 if(!e->to->status.reachable || e->to == myself)
1009                         continue;
1010
1011                 if(sockaddrcmp_noport(from, &e->address)) {
1012                         if(last_hard_try == now.tv_sec)
1013                                 continue;
1014                         hard = true;
1015                 }
1016
1017                 if(!try_mac(e->to, pkt))
1018                         continue;
1019
1020                 n = e->to;
1021                 break;
1022         }
1023
1024         if(hard)
1025                 last_hard_try = now.tv_sec;
1026
1027         last_hard_try = now.tv_sec;
1028         return n;
1029 }
1030
1031 void handle_incoming_vpn_data(void *data, int flags) {
1032         listen_socket_t *ls = data;
1033         vpn_packet_t pkt;
1034         char *hostname;
1035         sockaddr_t from = {{0}};
1036         socklen_t fromlen = sizeof from;
1037         node_t *n = NULL;
1038         node_t *to = myself;
1039         int len;
1040
1041         len = recvfrom(ls->udp.fd, &pkt.dstid, MAXSIZE, 0, &from.sa, &fromlen);
1042
1043         if(len <= 0 || len > MAXSIZE) {
1044                 if(!sockwouldblock(sockerrno))
1045                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1046                 return;
1047         }
1048
1049         pkt.len = len;
1050
1051         sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
1052
1053         bool direct = false;
1054         if(len >= sizeof pkt.dstid + sizeof pkt.srcid) {
1055                 n = lookup_node_id(&pkt.srcid);
1056                 if(n) {
1057                         node_id_t nullid = {};
1058                         if(memcmp(&pkt.dstid, &nullid, sizeof nullid) == 0) {
1059                                 /* A zero dstid is used to indicate a direct, non-relayed packet. */
1060                                 direct = true;
1061                         } else {
1062                                 to = lookup_node_id(&pkt.dstid);
1063                                 if(!to) {
1064                                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet presumably sent by %s (%s) but with unknown destination ID", n->name, n->hostname);
1065                                         return;
1066                                 }
1067                         }
1068                         pkt.len -= sizeof pkt.dstid + sizeof pkt.srcid;
1069                 }
1070         }
1071
1072         if(to != myself) {
1073                 /* We are being asked to relay this packet. */
1074
1075                 /* Don't allow random strangers to relay through us. Note that we check for *any* known address since we are not necessarily the first relay. */
1076                 if (!lookup_node_udp(&from)) {
1077                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Refusing to relay packet from (presumably) %s (%s) to (presumably) %s (%s) because the packet comes from an unknown address", n->name, n->hostname, to->name, to->hostname);
1078                         return;
1079                 }
1080
1081                 send_sptps_data_priv(to, n, 0, pkt.seqno, pkt.len);
1082                 return;
1083         }
1084
1085         if(!n) {
1086                 /* Most likely an old-style packet without node IDs. */
1087                 direct = true;
1088                 memmove(pkt.seqno, &pkt.dstid, sizeof pkt - offsetof(vpn_packet_t, seqno));
1089                 n = lookup_node_udp(&from);
1090         }
1091
1092         if(!n)
1093                 n = try_harder(&from, &pkt);
1094
1095         if(!n) {
1096                 if(debug_level >= DEBUG_PROTOCOL) {
1097                         hostname = sockaddr2hostname(&from);
1098                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1099                         free(hostname);
1100                 }
1101                 return;
1102         }
1103
1104         if(!receive_udppacket(n, &pkt))
1105                 return;
1106
1107         n->sock = ls - listen_socket;
1108         if(direct && sockaddrcmp(&from, &n->address))
1109                 update_node_udp(n, &from);
1110 }
1111
1112 void handle_device_data(void *data, int flags) {
1113         vpn_packet_t packet;
1114
1115         packet.priority = 0;
1116
1117         if(devops.read(&packet)) {
1118                 myself->in_packets++;
1119                 myself->in_bytes += packet.len;
1120                 route(myself, &packet);
1121         }
1122 }