2 event.c -- I/O, timeout and signal event handling
3 Copyright (C) 2012-2022 Guus Sliepen <guus@tinc-vpn.org>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #ifdef HAVE_SYS_EPOLL_H
25 #include <sys/epoll.h>
36 #ifdef HAVE_SYS_EPOLL_H
37 static int epollset = 0;
39 static fd_set readfds;
40 static fd_set writefds;
44 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
45 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
46 static DWORD event_count = 0;
50 #ifdef HAVE_SYS_EPOLL_H
51 static inline int event_epoll_init(void) {
52 /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
53 Decent kernels will ignore this value making it unlimited.
54 epoll_create1 might be better, but these kernels would not be supported
57 return epoll_create(1024);
61 static int io_compare(const io_t *a, const io_t *b) {
66 if(a->event < b->event) {
70 if(a->event > b->event) {
78 static int timeout_compare(const timeout_t *a, const timeout_t *b) {
80 timersub(&a->tv, &b->tv, &diff);
90 if(diff.tv_usec < 0) {
94 if(diff.tv_usec > 0) {
109 static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
110 static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
112 void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
121 io->event = WSACreateEvent();
123 if(io->event == WSA_INVALID_EVENT) {
136 #ifndef HAVE_SYS_EPOLL_H
138 if(!splay_insert_node(&io_tree, &io->node)) {
146 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
148 io_add(io, cb, data, -1, 0);
152 void io_set(io_t *io, int flags) {
153 #ifdef HAVE_SYS_EPOLL_H
156 epollset = event_epoll_init();
161 if(flags == io->flags) {
172 #ifdef HAVE_SYS_EPOLL_H
173 epoll_ctl(epollset, EPOLL_CTL_DEL, io->fd, NULL);
175 struct epoll_event ev = {
180 if(flags & IO_READ) {
181 ev.events |= EPOLLIN;
184 if(flags & IO_WRITE) {
185 ev.events |= EPOLLOUT;
186 } else if(ev.events == 0) {
187 io_tree.generation++;
191 if(epoll_ctl(epollset, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
192 perror("epoll_ctl_add");
197 if(flags & IO_READ) {
198 FD_SET(io->fd, &readfds);
200 FD_CLR(io->fd, &readfds);
203 if(flags & IO_WRITE) {
204 FD_SET(io->fd, &writefds);
206 FD_CLR(io->fd, &writefds);
213 if(flags & IO_WRITE) {
214 events |= WRITE_EVENTS;
217 if(flags & IO_READ) {
218 events |= READ_EVENTS;
221 if(WSAEventSelect(io->fd, io->event, events) != 0) {
228 void io_del(io_t *io) {
236 if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
243 #ifndef HAVE_SYS_EPOLL_H
244 splay_unlink_node(&io_tree, &io->node);
249 void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
251 timeout->data = data;
252 timeout->node.data = timeout;
254 timeout_set(timeout, tv);
257 void timeout_set(timeout_t *timeout, struct timeval *tv) {
258 if(timerisset(&timeout->tv)) {
259 splay_unlink_node(&timeout_tree, &timeout->node);
263 gettimeofday(&now, NULL);
266 timeradd(&now, tv, &timeout->tv);
268 if(!splay_insert_node(&timeout_tree, &timeout->node)) {
273 void timeout_del(timeout_t *timeout) {
278 splay_unlink_node(&timeout_tree, &timeout->node);
280 timeout->tv = (struct timeval) {
289 # define NSIG (_SIGMAX + 1) /* For QNX */
293 static io_t signalio;
294 static int pipefd[2] = {-1, -1};
295 static signal_t *signal_handle[NSIG + 1] = {NULL};
297 static void signal_handler(int signum) {
298 unsigned char num = signum;
300 if(write(pipefd[1], &num, 1) != 1) {
301 // Pipe full or broken, nothing we can do about it.
305 static void signalio_handler(void *data, int flags) {
308 unsigned char signum;
310 if(read(pipefd[0], &signum, 1) != 1) {
314 signal_t *sig = signal_handle[signum];
321 static void pipe_init(void) {
323 io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
327 void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
332 sig->signum = signum;
336 if(pipefd[0] == -1) {
340 signal(signum, signal_handler);
342 signal_handle[signum] = sig;
345 void signal_del(signal_t *sig) {
350 signal(sig->signum, SIG_DFL);
352 signal_handle[sig->signum] = NULL;
357 static struct timeval *timeout_execute(struct timeval *diff) {
358 gettimeofday(&now, NULL);
359 struct timeval *tv = NULL;
361 while(timeout_tree.head) {
362 timeout_t *timeout = timeout_tree.head->data;
363 timersub(&timeout->tv, &now, diff);
365 if(diff->tv_sec < 0) {
366 timeout->cb(timeout->data);
368 if(timercmp(&timeout->tv, &now, <)) {
369 timeout_del(timeout);
380 bool event_loop(void) {
385 #ifdef HAVE_SYS_EPOLL_H
388 epollset = event_epoll_init();
398 struct timeval *tv = timeout_execute(&diff);
399 #ifndef HAVE_SYS_EPOLL_H
400 memcpy(&readable, &readfds, sizeof(readable));
401 memcpy(&writable, &writefds, sizeof(writable));
405 #ifdef HAVE_SYS_EPOLL_H
406 struct epoll_event events[EPOLL_MAX_EVENTS_PER_LOOP];
407 long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
409 if(timeout > INT_MAX) {
413 int n = epoll_wait(epollset, events, EPOLL_MAX_EVENTS_PER_LOOP, (int)timeout);
418 io_t *last = io_tree.tail->data;
419 maxfds = last->fd + 1;
422 int n = select(maxfds, &readable, &writable, NULL, tv);
426 if(sockwouldblock(sockerrno)) {
437 unsigned int curgen = io_tree.generation;
440 #ifdef HAVE_SYS_EPOLL_H
442 for(int i = 0; i < n; i++) {
443 io_t *io = events[i].data.ptr;
445 if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
446 io->cb(io->data, IO_WRITE);
449 if(curgen != io_tree.generation) {
453 if(events[i].events & EPOLLIN && io->flags & IO_READ) {
454 io->cb(io->data, IO_READ);
457 if(curgen != io_tree.generation) {
464 for splay_each(io_t, io, &io_tree) {
465 if(FD_ISSET(io->fd, &writable)) {
466 io->cb(io->data, IO_WRITE);
467 } else if(FD_ISSET(io->fd, &readable)) {
468 io->cb(io->data, IO_READ);
474 There are scenarios in which the callback will remove another io_t from the tree
475 (e.g. closing a double connection). Since splay_each does not support that, we
476 need to exit the loop if that happens. That's okay, since any remaining events will
477 get picked up by the next select() call.
479 if(curgen != io_tree.generation) {
488 assert(WSA_WAIT_EVENT_0 == 0);
492 struct timeval *tv = timeout_execute(&diff);
493 DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
501 For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
502 which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
503 it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
504 is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
505 to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
507 Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
508 this event being fired again if ignored.
510 unsigned int curgen = io_tree.generation;
512 for splay_each(io_t, io, &io_tree) {
513 if(io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
514 io->cb(io->data, IO_WRITE);
516 if(curgen != io_tree.generation) {
522 if(event_count > WSA_MAXIMUM_WAIT_EVENTS) {
523 WSASetLastError(WSA_INVALID_PARAMETER);
527 WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS];
528 io_t *io_map[WSA_MAXIMUM_WAIT_EVENTS];
529 DWORD event_index = 0;
531 for splay_each(io_t, io, &io_tree) {
532 events[event_index] = io->event;
533 io_map[event_index] = io;
538 * If the generation number changes due to event addition
539 * or removal by a callback we restart the loop.
541 curgen = io_tree.generation;
543 for(DWORD event_offset = 0; event_offset < event_count;) {
544 DWORD result = WSAWaitForMultipleEvents(event_count - event_offset, &events[event_offset], FALSE, timeout_ms, FALSE);
546 if(result == WSA_WAIT_TIMEOUT) {
550 if(result >= event_count - event_offset) {
554 /* Look up io in the map by index. */
555 event_index = result + event_offset;
556 io_t *io = io_map[event_index];
561 if(curgen != io_tree.generation) {
565 WSANETWORKEVENTS network_events;
567 if(WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) {
571 if(network_events.lNetworkEvents & READ_EVENTS) {
572 io->cb(io->data, IO_READ);
574 if(curgen != io_tree.generation) {
580 The fd might be available for write too. However, if we already fired the read callback, that
581 callback might have deleted the io (e.g. through terminate_connection()), so we can't fire the
582 write callback here. Instead, we loop back and let the writable io loop above handle it.
586 /* Continue checking the rest of the events. */
587 event_offset = event_index + 1;
589 /* Just poll the next time through. */
599 void event_exit(void) {