2 event.c -- I/O, timeout and signal event handling
3 Copyright (C) 2012-2022 Guus Sliepen <guus@tinc-vpn.org>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #ifdef HAVE_SYS_EPOLL_H
25 #include <sys/epoll.h>
35 #ifdef HAVE_SYS_EPOLL_H
36 static int epollset = 0;
38 static fd_set readfds;
39 static fd_set writefds;
43 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
44 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
45 static DWORD event_count = 0;
49 #ifdef HAVE_SYS_EPOLL_H
50 static inline int event_epoll_init(void) {
51 /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
52 Decent kernels will ignore this value making it unlimited.
53 epoll_create1 might be better, but these kernels would not be supported
56 return epoll_create(1024);
60 static int io_compare(const io_t *a, const io_t *b) {
65 if(a->event < b->event) {
69 if(a->event > b->event) {
77 static int timeout_compare(const timeout_t *a, const timeout_t *b) {
79 timersub(&a->tv, &b->tv, &diff);
89 if(diff.tv_usec < 0) {
93 if(diff.tv_usec > 0) {
108 static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
109 static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
111 void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
120 io->event = WSACreateEvent();
122 if(io->event == WSA_INVALID_EVENT) {
135 #ifndef HAVE_SYS_EPOLL_H
137 if(!splay_insert_node(&io_tree, &io->node)) {
145 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
147 io_add(io, cb, data, -1, 0);
151 void io_set(io_t *io, int flags) {
152 #ifdef HAVE_SYS_EPOLL_H
155 epollset = event_epoll_init();
160 if(flags == io->flags) {
171 #ifdef HAVE_SYS_EPOLL_H
172 epoll_ctl(epollset, EPOLL_CTL_DEL, io->fd, NULL);
174 struct epoll_event ev = {
179 if(flags & IO_READ) {
180 ev.events |= EPOLLIN;
183 if(flags & IO_WRITE) {
184 ev.events |= EPOLLOUT;
185 } else if(ev.events == 0) {
186 io_tree.generation++;
190 if(epoll_ctl(epollset, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
191 perror("epoll_ctl_add");
196 if(flags & IO_READ) {
197 FD_SET(io->fd, &readfds);
199 FD_CLR(io->fd, &readfds);
202 if(flags & IO_WRITE) {
203 FD_SET(io->fd, &writefds);
205 FD_CLR(io->fd, &writefds);
212 if(flags & IO_WRITE) {
213 events |= WRITE_EVENTS;
216 if(flags & IO_READ) {
217 events |= READ_EVENTS;
220 if(WSAEventSelect(io->fd, io->event, events) != 0) {
227 void io_del(io_t *io) {
235 if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
242 #ifndef HAVE_SYS_EPOLL_H
243 splay_unlink_node(&io_tree, &io->node);
248 void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
250 timeout->data = data;
251 timeout->node.data = timeout;
253 timeout_set(timeout, tv);
256 void timeout_set(timeout_t *timeout, struct timeval *tv) {
257 if(timerisset(&timeout->tv)) {
258 splay_unlink_node(&timeout_tree, &timeout->node);
262 gettimeofday(&now, NULL);
265 timeradd(&now, tv, &timeout->tv);
267 if(!splay_insert_node(&timeout_tree, &timeout->node)) {
272 void timeout_del(timeout_t *timeout) {
277 splay_unlink_node(&timeout_tree, &timeout->node);
279 timeout->tv = (struct timeval) {
288 # define NSIG (_SIGMAX + 1) /* For QNX */
292 static io_t signalio;
293 static int pipefd[2] = {-1, -1};
294 static signal_t *signal_handle[NSIG + 1] = {NULL};
296 static void signal_handler(int signum) {
297 unsigned char num = signum;
299 if(write(pipefd[1], &num, 1) != 1) {
300 // Pipe full or broken, nothing we can do about it.
304 static void signalio_handler(void *data, int flags) {
307 unsigned char signum;
309 if(read(pipefd[0], &signum, 1) != 1) {
313 signal_t *sig = signal_handle[signum];
320 static void pipe_init(void) {
322 io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
326 void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
331 sig->signum = signum;
335 if(pipefd[0] == -1) {
339 signal(signum, signal_handler);
341 signal_handle[signum] = sig;
344 void signal_del(signal_t *sig) {
349 signal(sig->signum, SIG_DFL);
351 signal_handle[sig->signum] = NULL;
356 static struct timeval *timeout_execute(struct timeval *diff) {
357 gettimeofday(&now, NULL);
358 struct timeval *tv = NULL;
360 while(timeout_tree.head) {
361 timeout_t *timeout = timeout_tree.head->data;
362 timersub(&timeout->tv, &now, diff);
364 if(diff->tv_sec < 0) {
365 timeout->cb(timeout->data);
367 if(timercmp(&timeout->tv, &now, <)) {
368 timeout_del(timeout);
379 bool event_loop(void) {
384 #ifdef HAVE_SYS_EPOLL_H
387 epollset = event_epoll_init();
397 struct timeval *tv = timeout_execute(&diff);
398 #ifndef HAVE_SYS_EPOLL_H
399 memcpy(&readable, &readfds, sizeof(readable));
400 memcpy(&writable, &writefds, sizeof(writable));
404 #ifdef HAVE_SYS_EPOLL_H
405 struct epoll_event events[EPOLL_MAX_EVENTS_PER_LOOP];
406 long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
408 if(timeout > INT_MAX) {
412 int n = epoll_wait(epollset, events, EPOLL_MAX_EVENTS_PER_LOOP, (int)timeout);
417 io_t *last = io_tree.tail->data;
418 maxfds = last->fd + 1;
421 int n = select(maxfds, &readable, &writable, NULL, tv);
425 if(sockwouldblock(sockerrno)) {
436 unsigned int curgen = io_tree.generation;
439 #ifdef HAVE_SYS_EPOLL_H
441 for(int i = 0; i < n; i++) {
442 io_t *io = events[i].data.ptr;
444 if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
445 io->cb(io->data, IO_WRITE);
448 if(curgen != io_tree.generation) {
452 if(events[i].events & EPOLLIN && io->flags & IO_READ) {
453 io->cb(io->data, IO_READ);
456 if(curgen != io_tree.generation) {
463 for splay_each(io_t, io, &io_tree) {
464 if(FD_ISSET(io->fd, &writable)) {
465 io->cb(io->data, IO_WRITE);
466 } else if(FD_ISSET(io->fd, &readable)) {
467 io->cb(io->data, IO_READ);
473 There are scenarios in which the callback will remove another io_t from the tree
474 (e.g. closing a double connection). Since splay_each does not support that, we
475 need to exit the loop if that happens. That's okay, since any remaining events will
476 get picked up by the next select() call.
478 if(curgen != io_tree.generation) {
487 assert(WSA_WAIT_EVENT_0 == 0);
491 struct timeval *tv = timeout_execute(&diff);
492 DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
500 For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
501 which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
502 it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
503 is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
504 to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
506 Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
507 this event being fired again if ignored.
509 unsigned int curgen = io_tree.generation;
511 for splay_each(io_t, io, &io_tree) {
512 if(io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
513 io->cb(io->data, IO_WRITE);
515 if(curgen != io_tree.generation) {
521 if(event_count > WSA_MAXIMUM_WAIT_EVENTS) {
522 WSASetLastError(WSA_INVALID_PARAMETER);
526 WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS];
527 io_t *io_map[WSA_MAXIMUM_WAIT_EVENTS];
528 DWORD event_index = 0;
530 for splay_each(io_t, io, &io_tree) {
531 events[event_index] = io->event;
532 io_map[event_index] = io;
537 * If the generation number changes due to event addition
538 * or removal by a callback we restart the loop.
540 curgen = io_tree.generation;
542 for(DWORD event_offset = 0; event_offset < event_count;) {
543 DWORD result = WSAWaitForMultipleEvents(event_count - event_offset, &events[event_offset], FALSE, timeout_ms, FALSE);
545 if(result == WSA_WAIT_TIMEOUT) {
549 if(result >= event_count - event_offset) {
553 /* Look up io in the map by index. */
554 event_index = result + event_offset;
555 io_t *io = io_map[event_index];
560 if(curgen != io_tree.generation) {
564 WSANETWORKEVENTS network_events;
566 if(WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) {
570 if(network_events.lNetworkEvents & READ_EVENTS) {
571 io->cb(io->data, IO_READ);
573 if(curgen != io_tree.generation) {
579 The fd might be available for write too. However, if we already fired the read callback, that
580 callback might have deleted the io (e.g. through terminate_connection()), so we can't fire the
581 write callback here. Instead, we loop back and let the writable io loop above handle it.
585 /* Continue checking the rest of the events. */
586 event_offset = event_index + 1;
588 /* Just poll the next time through. */
598 void event_exit(void) {