Move repeating MIN/MAX macros into dropin.h.
[tinc] / src / event.c
1 /*
2     event.c -- I/O, timeout and signal event handling
3     Copyright (C) 2012-2021 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21 #include "dropin.h"
22
23 #ifdef HAVE_SYS_EPOLL_H
24 #include <sys/epoll.h>
25 #endif
26
27 #include "event.h"
28 #include "utils.h"
29 #include "net.h"
30
31 struct timeval now;
32 #ifndef HAVE_MINGW
33
34 #ifdef HAVE_SYS_EPOLL_H
35 static int epollset = 0;
36 #else
37 static fd_set readfds;
38 static fd_set writefds;
39 #endif
40
41 #else
42 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
43 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
44 static DWORD event_count = 0;
45 #endif
46 static bool running;
47
48 #ifdef HAVE_SYS_EPOLL_H
49 static inline int event_epoll_init() {
50         /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
51                 Decent kernels will ignore this value making it unlimited.
52                 epoll_create1 might be better, but these kernels would not be supported
53                 in that case.
54         */
55         return epoll_create(1024);
56 }
57 #endif
58
59 static int io_compare(const io_t *a, const io_t *b) {
60 #ifndef HAVE_MINGW
61         return a->fd - b->fd;
62 #else
63
64         if(a->event < b->event) {
65                 return -1;
66         }
67
68         if(a->event > b->event) {
69                 return 1;
70         }
71
72         return 0;
73 #endif
74 }
75
76 static int timeout_compare(const timeout_t *a, const timeout_t *b) {
77         struct timeval diff;
78         timersub(&a->tv, &b->tv, &diff);
79
80         if(diff.tv_sec < 0) {
81                 return -1;
82         }
83
84         if(diff.tv_sec > 0) {
85                 return 1;
86         }
87
88         if(diff.tv_usec < 0) {
89                 return -1;
90         }
91
92         if(diff.tv_usec > 0) {
93                 return 1;
94         }
95
96         if(a < b) {
97                 return -1;
98         }
99
100         if(a > b) {
101                 return 1;
102         }
103
104         return 0;
105 }
106
107 static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
108 static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
109
110 void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
111         if(io->cb) {
112                 return;
113         }
114
115         io->fd = fd;
116 #ifdef HAVE_MINGW
117
118         if(io->fd != -1) {
119                 io->event = WSACreateEvent();
120
121                 if(io->event == WSA_INVALID_EVENT) {
122                         abort();
123                 }
124         }
125
126         event_count++;
127 #endif
128         io->cb = cb;
129         io->data = data;
130         io->node.data = io;
131
132         io_set(io, flags);
133
134 #ifndef HAVE_SYS_EPOLL_H
135
136         if(!splay_insert_node(&io_tree, &io->node)) {
137                 abort();
138         }
139
140 #endif
141 }
142
143 #ifdef HAVE_MINGW
144 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
145         io->event = event;
146         io_add(io, cb, data, -1, 0);
147 }
148 #endif
149
150 void io_set(io_t *io, int flags) {
151 #ifdef HAVE_SYS_EPOLL_H
152
153         if(!epollset) {
154                 epollset = event_epoll_init();
155         }
156
157 #endif
158
159         if(flags == io->flags) {
160                 return;
161         }
162
163         io->flags = flags;
164
165         if(io->fd == -1) {
166                 return;
167         }
168
169 #ifndef HAVE_MINGW
170 #ifdef HAVE_SYS_EPOLL_H
171         epoll_ctl(epollset, EPOLL_CTL_DEL, io->fd, NULL);
172
173         struct epoll_event ev = {
174                 .events = 0,
175                 .data.ptr = io,
176         };
177
178         if(flags & IO_READ) {
179                 ev.events |= EPOLLIN;
180         }
181
182         if(flags & IO_WRITE) {
183                 ev.events |= EPOLLOUT;
184         } else if(ev.events == 0) {
185                 io_tree.generation++;
186                 return;
187         }
188
189         if(epoll_ctl(epollset, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
190                 perror("epoll_ctl_add");
191         }
192
193 #else
194
195         if(flags & IO_READ) {
196                 FD_SET(io->fd, &readfds);
197         } else {
198                 FD_CLR(io->fd, &readfds);
199         }
200
201         if(flags & IO_WRITE) {
202                 FD_SET(io->fd, &writefds);
203         } else {
204                 FD_CLR(io->fd, &writefds);
205         }
206
207 #endif
208 #else
209         long events = 0;
210
211         if(flags & IO_WRITE) {
212                 events |= WRITE_EVENTS;
213         }
214
215         if(flags & IO_READ) {
216                 events |= READ_EVENTS;
217         }
218
219         if(WSAEventSelect(io->fd, io->event, events) != 0) {
220                 abort();
221         }
222
223 #endif
224 }
225
226 void io_del(io_t *io) {
227         if(!io->cb) {
228                 return;
229         }
230
231         io_set(io, 0);
232 #ifdef HAVE_MINGW
233
234         if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
235                 abort();
236         }
237
238         event_count--;
239 #endif
240
241 #ifndef HAVE_SYS_EPOLL_H
242         splay_unlink_node(&io_tree, &io->node);
243 #endif
244         io->cb = NULL;
245 }
246
247 void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
248         timeout->cb = cb;
249         timeout->data = data;
250         timeout->node.data = timeout;
251
252         timeout_set(timeout, tv);
253 }
254
255 void timeout_set(timeout_t *timeout, struct timeval *tv) {
256         if(timerisset(&timeout->tv)) {
257                 splay_unlink_node(&timeout_tree, &timeout->node);
258         }
259
260         if(!now.tv_sec) {
261                 gettimeofday(&now, NULL);
262         }
263
264         timeradd(&now, tv, &timeout->tv);
265
266         if(!splay_insert_node(&timeout_tree, &timeout->node)) {
267                 abort();
268         }
269 }
270
271 void timeout_del(timeout_t *timeout) {
272         if(!timeout->cb) {
273                 return;
274         }
275
276         splay_unlink_node(&timeout_tree, &timeout->node);
277         timeout->cb = 0;
278         timeout->tv = (struct timeval) {
279                 0, 0
280         };
281 }
282
283 #ifndef HAVE_MINGW
284
285 // From Matz's Ruby
286 #ifndef NSIG
287 # define NSIG (_SIGMAX + 1)      /* For QNX */
288 #endif
289
290
291 static io_t signalio;
292 static int pipefd[2] = {-1, -1};
293 static signal_t *signal_handle[NSIG + 1] = {};
294
295 static void signal_handler(int signum) {
296         unsigned char num = signum;
297         write(pipefd[1], &num, 1);
298 }
299
300 static void signalio_handler(void *data, int flags) {
301         (void)data;
302         (void)flags;
303         unsigned char signum;
304
305         if(read(pipefd[0], &signum, 1) != 1) {
306                 return;
307         }
308
309         signal_t *sig = signal_handle[signum];
310
311         if(sig) {
312                 sig->cb(sig->data);
313         }
314 }
315
316 static void pipe_init(void) {
317         if(!pipe(pipefd)) {
318                 io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
319         }
320 }
321
322 void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
323         if(sig->cb) {
324                 return;
325         }
326
327         sig->signum = signum;
328         sig->cb = cb;
329         sig->data = data;
330
331         if(pipefd[0] == -1) {
332                 pipe_init();
333         }
334
335         signal(signum, signal_handler);
336
337         signal_handle[signum] = sig;
338 }
339
340 void signal_del(signal_t *sig) {
341         if(!sig->cb) {
342                 return;
343         }
344
345         signal(sig->signum, SIG_DFL);
346
347         signal_handle[sig->signum] = NULL;
348         sig->cb = NULL;
349 }
350 #endif
351
352 static struct timeval *timeout_execute(struct timeval *diff) {
353         gettimeofday(&now, NULL);
354         struct timeval *tv = NULL;
355
356         while(timeout_tree.head) {
357                 timeout_t *timeout = timeout_tree.head->data;
358                 timersub(&timeout->tv, &now, diff);
359
360                 if(diff->tv_sec < 0) {
361                         timeout->cb(timeout->data);
362
363                         if(timercmp(&timeout->tv, &now, <)) {
364                                 timeout_del(timeout);
365                         }
366                 } else {
367                         tv = diff;
368                         break;
369                 }
370         }
371
372         return tv;
373 }
374
375 bool event_loop(void) {
376         running = true;
377
378 #ifndef HAVE_MINGW
379
380 #ifdef HAVE_SYS_EPOLL_H
381
382         if(!epollset) {
383                 epollset = event_epoll_init();
384         }
385
386 #else
387         fd_set readable;
388         fd_set writable;
389 #endif
390
391         while(running) {
392                 struct timeval diff;
393                 struct timeval *tv = timeout_execute(&diff);
394 #ifndef HAVE_SYS_EPOLL_H
395                 memcpy(&readable, &readfds, sizeof(readable));
396                 memcpy(&writable, &writefds, sizeof(writable));
397 #endif
398
399
400 #ifdef HAVE_SYS_EPOLL_H
401                 struct epoll_event events[EPOLL_MAX_EVENTS_PER_LOOP];
402                 long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
403
404                 if(timeout > INT_MAX) {
405                         timeout = INT_MAX;
406                 }
407
408                 int n = epoll_wait(epollset, events, EPOLL_MAX_EVENTS_PER_LOOP, (int)timeout);
409 #else
410                 int maxfds =  0;
411
412                 if(io_tree.tail) {
413                         io_t *last = io_tree.tail->data;
414                         maxfds = last->fd + 1;
415                 }
416
417                 int n = select(maxfds, &readable, &writable, NULL, tv);
418 #endif
419
420                 if(n < 0) {
421                         if(sockwouldblock(sockerrno)) {
422                                 continue;
423                         } else {
424                                 return false;
425                         }
426                 }
427
428                 if(!n) {
429                         continue;
430                 }
431
432                 unsigned int curgen = io_tree.generation;
433
434
435 #ifdef HAVE_SYS_EPOLL_H
436
437                 for(int i = 0; i < n; i++) {
438                         io_t *io = events[i].data.ptr;
439
440                         if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
441                                 io->cb(io->data, IO_WRITE);
442                         }
443
444                         if(curgen != io_tree.generation) {
445                                 break;
446                         }
447
448                         if(events[i].events & EPOLLIN && io->flags & IO_READ) {
449                                 io->cb(io->data, IO_READ);
450                         }
451
452                         if(curgen != io_tree.generation) {
453                                 break;
454                         }
455                 }
456
457 #else
458
459                 for splay_each(io_t, io, &io_tree) {
460                         if(FD_ISSET(io->fd, &writable)) {
461                                 io->cb(io->data, IO_WRITE);
462                         } else if(FD_ISSET(io->fd, &readable)) {
463                                 io->cb(io->data, IO_READ);
464                         } else {
465                                 continue;
466                         }
467
468                         /*
469                                 There are scenarios in which the callback will remove another io_t from the tree
470                                 (e.g. closing a double connection). Since splay_each does not support that, we
471                                 need to exit the loop if that happens. That's okay, since any remaining events will
472                                 get picked up by the next select() call.
473                         */
474                         if(curgen != io_tree.generation) {
475                                 break;
476                         }
477                 }
478
479 #endif
480         }
481
482 #else
483
484         while(running) {
485                 struct timeval diff;
486                 struct timeval *tv = timeout_execute(&diff);
487                 DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
488
489                 if(!event_count) {
490                         Sleep(timeout_ms);
491                         continue;
492                 }
493
494                 /*
495                    For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
496                    which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
497                    it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
498                    is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
499                    to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
500
501                    Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
502                    this event being fired again if ignored.
503                 */
504                 unsigned int curgen = io_tree.generation;
505
506                 for splay_each(io_t, io, &io_tree) {
507                         if(io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
508                                 io->cb(io->data, IO_WRITE);
509
510                                 if(curgen != io_tree.generation) {
511                                         break;
512                                 }
513                         }
514                 }
515
516                 if(event_count > WSA_MAXIMUM_WAIT_EVENTS) {
517                         WSASetLastError(WSA_INVALID_PARAMETER);
518                         return(false);
519                 }
520
521                 WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS];
522                 io_t *io_map[WSA_MAXIMUM_WAIT_EVENTS];
523                 DWORD event_index = 0;
524
525                 for splay_each(io_t, io, &io_tree) {
526                         events[event_index] = io->event;
527                         io_map[event_index] = io;
528                         event_index++;
529                 }
530
531                 /*
532                  * If the generation number changes due to event addition
533                  * or removal by a callback we restart the loop.
534                  */
535                 curgen = io_tree.generation;
536
537                 for(DWORD event_offset = 0; event_offset < event_count;) {
538                         DWORD result = WSAWaitForMultipleEvents(event_count - event_offset, &events[event_offset], FALSE, timeout_ms, FALSE);
539
540                         if(result == WSA_WAIT_TIMEOUT) {
541                                 break;
542                         }
543
544                         if(result < WSA_WAIT_EVENT_0 || result >= WSA_WAIT_EVENT_0 + event_count - event_offset) {
545                                 return false;
546                         }
547
548                         /* Look up io in the map by index. */
549                         event_index = result - WSA_WAIT_EVENT_0 + event_offset;
550                         io_t *io = io_map[event_index];
551
552                         if(io->fd == -1) {
553                                 io->cb(io->data, 0);
554
555                                 if(curgen != io_tree.generation) {
556                                         break;
557                                 }
558                         } else {
559                                 WSANETWORKEVENTS network_events;
560
561                                 if(WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) {
562                                         return(false);
563                                 }
564
565                                 if(network_events.lNetworkEvents & READ_EVENTS) {
566                                         io->cb(io->data, IO_READ);
567
568                                         if(curgen != io_tree.generation) {
569                                                 break;
570                                         }
571                                 }
572
573                                 /*
574                                     The fd might be available for write too. However, if we already fired the read callback, that
575                                     callback might have deleted the io (e.g. through terminate_connection()), so we can't fire the
576                                     write callback here. Instead, we loop back and let the writable io loop above handle it.
577                                  */
578                         }
579
580                         /* Continue checking the rest of the events. */
581                         event_offset = event_index + 1;
582
583                         /* Just poll the next time through. */
584                         timeout_ms = 0;
585                 }
586         }
587
588 #endif
589
590         return true;
591 }
592
593 void event_exit(void) {
594         running = false;
595 }