2 event.c -- I/O, timeout and signal event handling
3 Copyright (C) 2012-2021 Guus Sliepen <guus@tinc-vpn.org>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 #ifdef HAVE_SYS_EPOLL_H
24 #include <sys/epoll.h>
34 #ifdef HAVE_SYS_EPOLL_H
35 static int epollset = 0;
37 static fd_set readfds;
38 static fd_set writefds;
42 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
43 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
44 static DWORD event_count = 0;
48 #ifdef HAVE_SYS_EPOLL_H
49 static inline int event_epoll_init() {
50 /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
51 Decent kernels will ignore this value making it unlimited.
52 epoll_create1 might be better, but these kernels would not be supported
55 return epoll_create(1024);
59 static int io_compare(const io_t *a, const io_t *b) {
64 if(a->event < b->event) {
68 if(a->event > b->event) {
76 static int timeout_compare(const timeout_t *a, const timeout_t *b) {
78 timersub(&a->tv, &b->tv, &diff);
88 if(diff.tv_usec < 0) {
92 if(diff.tv_usec > 0) {
107 static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
108 static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
110 void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
119 io->event = WSACreateEvent();
121 if(io->event == WSA_INVALID_EVENT) {
134 #ifndef HAVE_SYS_EPOLL_H
136 if(!splay_insert_node(&io_tree, &io->node)) {
144 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
146 io_add(io, cb, data, -1, 0);
150 void io_set(io_t *io, int flags) {
151 #ifdef HAVE_SYS_EPOLL_H
154 epollset = event_epoll_init();
159 if(flags == io->flags) {
170 #ifdef HAVE_SYS_EPOLL_H
171 epoll_ctl(epollset, EPOLL_CTL_DEL, io->fd, NULL);
173 struct epoll_event ev = {
178 if(flags & IO_READ) {
179 ev.events |= EPOLLIN;
182 if(flags & IO_WRITE) {
183 ev.events |= EPOLLOUT;
184 } else if(ev.events == 0) {
185 io_tree.generation++;
189 if(epoll_ctl(epollset, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
190 perror("epoll_ctl_add");
195 if(flags & IO_READ) {
196 FD_SET(io->fd, &readfds);
198 FD_CLR(io->fd, &readfds);
201 if(flags & IO_WRITE) {
202 FD_SET(io->fd, &writefds);
204 FD_CLR(io->fd, &writefds);
211 if(flags & IO_WRITE) {
212 events |= WRITE_EVENTS;
215 if(flags & IO_READ) {
216 events |= READ_EVENTS;
219 if(WSAEventSelect(io->fd, io->event, events) != 0) {
226 void io_del(io_t *io) {
234 if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
241 #ifndef HAVE_SYS_EPOLL_H
242 splay_unlink_node(&io_tree, &io->node);
247 void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
249 timeout->data = data;
250 timeout->node.data = timeout;
252 timeout_set(timeout, tv);
255 void timeout_set(timeout_t *timeout, struct timeval *tv) {
256 if(timerisset(&timeout->tv)) {
257 splay_unlink_node(&timeout_tree, &timeout->node);
261 gettimeofday(&now, NULL);
264 timeradd(&now, tv, &timeout->tv);
266 if(!splay_insert_node(&timeout_tree, &timeout->node)) {
271 void timeout_del(timeout_t *timeout) {
276 splay_unlink_node(&timeout_tree, &timeout->node);
278 timeout->tv = (struct timeval) {
287 # define NSIG (_SIGMAX + 1) /* For QNX */
291 static io_t signalio;
292 static int pipefd[2] = {-1, -1};
293 static signal_t *signal_handle[NSIG + 1] = {};
295 static void signal_handler(int signum) {
296 unsigned char num = signum;
297 write(pipefd[1], &num, 1);
300 static void signalio_handler(void *data, int flags) {
303 unsigned char signum;
305 if(read(pipefd[0], &signum, 1) != 1) {
309 signal_t *sig = signal_handle[signum];
316 static void pipe_init(void) {
318 io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
322 void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
327 sig->signum = signum;
331 if(pipefd[0] == -1) {
335 signal(signum, signal_handler);
337 signal_handle[signum] = sig;
340 void signal_del(signal_t *sig) {
345 signal(sig->signum, SIG_DFL);
347 signal_handle[sig->signum] = NULL;
352 static struct timeval *timeout_execute(struct timeval *diff) {
353 gettimeofday(&now, NULL);
354 struct timeval *tv = NULL;
356 while(timeout_tree.head) {
357 timeout_t *timeout = timeout_tree.head->data;
358 timersub(&timeout->tv, &now, diff);
360 if(diff->tv_sec < 0) {
361 timeout->cb(timeout->data);
363 if(timercmp(&timeout->tv, &now, <)) {
364 timeout_del(timeout);
375 bool event_loop(void) {
380 #ifdef HAVE_SYS_EPOLL_H
383 epollset = event_epoll_init();
393 struct timeval *tv = timeout_execute(&diff);
394 #ifndef HAVE_SYS_EPOLL_H
395 memcpy(&readable, &readfds, sizeof(readable));
396 memcpy(&writable, &writefds, sizeof(writable));
400 #ifdef HAVE_SYS_EPOLL_H
401 struct epoll_event events[EPOLL_MAX_EVENTS_PER_LOOP];
402 long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
404 if(timeout > INT_MAX) {
408 int n = epoll_wait(epollset, events, EPOLL_MAX_EVENTS_PER_LOOP, (int)timeout);
413 io_t *last = io_tree.tail->data;
414 maxfds = last->fd + 1;
417 int n = select(maxfds, &readable, &writable, NULL, tv);
421 if(sockwouldblock(sockerrno)) {
432 unsigned int curgen = io_tree.generation;
435 #ifdef HAVE_SYS_EPOLL_H
437 for(int i = 0; i < n; i++) {
438 io_t *io = events[i].data.ptr;
440 if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
441 io->cb(io->data, IO_WRITE);
444 if(curgen != io_tree.generation) {
448 if(events[i].events & EPOLLIN && io->flags & IO_READ) {
449 io->cb(io->data, IO_READ);
452 if(curgen != io_tree.generation) {
459 for splay_each(io_t, io, &io_tree) {
460 if(FD_ISSET(io->fd, &writable)) {
461 io->cb(io->data, IO_WRITE);
462 } else if(FD_ISSET(io->fd, &readable)) {
463 io->cb(io->data, IO_READ);
469 There are scenarios in which the callback will remove another io_t from the tree
470 (e.g. closing a double connection). Since splay_each does not support that, we
471 need to exit the loop if that happens. That's okay, since any remaining events will
472 get picked up by the next select() call.
474 if(curgen != io_tree.generation) {
486 struct timeval *tv = timeout_execute(&diff);
487 DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
495 For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
496 which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
497 it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
498 is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
499 to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
501 Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
502 this event being fired again if ignored.
504 unsigned int curgen = io_tree.generation;
506 for splay_each(io_t, io, &io_tree) {
507 if(io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
508 io->cb(io->data, IO_WRITE);
510 if(curgen != io_tree.generation) {
516 if(event_count > WSA_MAXIMUM_WAIT_EVENTS) {
517 WSASetLastError(WSA_INVALID_PARAMETER);
521 WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS];
522 io_t *io_map[WSA_MAXIMUM_WAIT_EVENTS];
523 DWORD event_index = 0;
525 for splay_each(io_t, io, &io_tree) {
526 events[event_index] = io->event;
527 io_map[event_index] = io;
532 * If the generation number changes due to event addition
533 * or removal by a callback we restart the loop.
535 curgen = io_tree.generation;
537 for(DWORD event_offset = 0; event_offset < event_count;) {
538 DWORD result = WSAWaitForMultipleEvents(event_count - event_offset, &events[event_offset], FALSE, timeout_ms, FALSE);
540 if(result == WSA_WAIT_TIMEOUT) {
544 if(result < WSA_WAIT_EVENT_0 || result >= WSA_WAIT_EVENT_0 + event_count - event_offset) {
548 /* Look up io in the map by index. */
549 event_index = result - WSA_WAIT_EVENT_0 + event_offset;
550 io_t *io = io_map[event_index];
555 if(curgen != io_tree.generation) {
559 WSANETWORKEVENTS network_events;
561 if(WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) {
565 if(network_events.lNetworkEvents & READ_EVENTS) {
566 io->cb(io->data, IO_READ);
568 if(curgen != io_tree.generation) {
574 The fd might be available for write too. However, if we already fired the read callback, that
575 callback might have deleted the io (e.g. through terminate_connection()), so we can't fire the
576 write callback here. Instead, we loop back and let the writable io loop above handle it.
580 /* Continue checking the rest of the events. */
581 event_offset = event_index + 1;
583 /* Just poll the next time through. */
593 void event_exit(void) {