diff --git a/src/api.c b/src/api.c index 3c7c498..2b499c8 100644 --- a/src/api.c +++ b/src/api.c @@ -9,10 +9,10 @@ #include "wepoll.h" #include "win.h" -static ts_tree_t epoll__handle_tree; +static ts_tree_t api__port_tree; -int epoll_global_init(void) { - ts_tree_init(&epoll__handle_tree); +int api_global_init(void) { + ts_tree_init(&api__port_tree); return 0; } @@ -28,8 +28,8 @@ static HANDLE epoll__create(void) { if (port_state == NULL) return NULL; - tree_node = port_state_to_handle_tree_node(port_state); - if (ts_tree_add(&epoll__handle_tree, tree_node, (uintptr_t) ephnd) < 0) { + tree_node = port_state_to_tree_node(port_state); + if (ts_tree_add(&api__port_tree, tree_node, (uintptr_t) ephnd) < 0) { /* This should never happen. */ port_delete(port_state); return_set_error(NULL, ERROR_ALREADY_EXISTS); @@ -59,13 +59,13 @@ int epoll_close(HANDLE ephnd) { if (init() < 0) return -1; - tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); + tree_node = ts_tree_del_and_ref(&api__port_tree, (uintptr_t) ephnd); if (tree_node == NULL) { err_set_win_error(ERROR_INVALID_PARAMETER); goto err; } - port_state = port_state_from_handle_tree_node(tree_node); + port_state = port_state_from_tree_node(tree_node); port_close(port_state); ts_tree_node_unref_and_destroy(tree_node); @@ -85,13 +85,13 @@ int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event* ev) { if (init() < 0) return -1; - tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); + tree_node = ts_tree_find_and_ref(&api__port_tree, (uintptr_t) ephnd); if (tree_node == NULL) { err_set_win_error(ERROR_INVALID_PARAMETER); goto err; } - port_state = port_state_from_handle_tree_node(tree_node); + port_state = port_state_from_tree_node(tree_node); r = port_ctl(port_state, op, sock, ev); ts_tree_node_unref(tree_node); @@ -123,13 +123,13 @@ int epoll_wait(HANDLE ephnd, if (init() < 0) return -1; - tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); + tree_node = ts_tree_find_and_ref(&api__port_tree, (uintptr_t) ephnd); if (tree_node == NULL) { err_set_win_error(ERROR_INVALID_PARAMETER); goto err; } - port_state = port_state_from_handle_tree_node(tree_node); + port_state = port_state_from_tree_node(tree_node); num_events = port_wait(port_state, events, maxevents, timeout); ts_tree_node_unref(tree_node); diff --git a/src/api.h b/src/api.h index 9bda65e..678ddb0 100644 --- a/src/api.h +++ b/src/api.h @@ -2,7 +2,12 @@ #define WEPOLL_API_H_ #include "config.h" +#include "wepoll.h" -WEPOLL_INTERNAL int epoll_global_init(void); +#define API_VALID_EPOLL_EVENTS \ + (EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | \ + EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP) + +WEPOLL_INTERNAL int api_global_init(void); #endif /* WEPOLL_API_H_ */ diff --git a/src/init.c b/src/init.c index 04f72bc..1e19515 100644 --- a/src/init.c +++ b/src/init.c @@ -21,7 +21,7 @@ static BOOL CALLBACK init__once_callback(INIT_ONCE* once, /* N.b. that initialization order matters here. */ if (ws_global_init() < 0 || nt_global_init() < 0 || - reflock_global_init() < 0 || epoll_global_init() < 0) + reflock_global_init() < 0 || api_global_init() < 0) return FALSE; init__done = true; diff --git a/src/nt.h b/src/nt.h index 4e66548..c411a00 100644 --- a/src/nt.h +++ b/src/nt.h @@ -34,6 +34,12 @@ typedef struct _IO_STATUS_BLOCK { ULONG_PTR Information; } IO_STATUS_BLOCK, *PIO_STATUS_BLOCK; +typedef struct _FILE_IO_COMPLETION_INFORMATION { + PVOID KeyContext; + PVOID ApcContext; + IO_STATUS_BLOCK IoStatusBlock; +} FILE_IO_COMPLETION_INFORMATION, *PFILE_IO_COMPLETION_INFORMATION; + typedef VOID(NTAPI* PIO_APC_ROUTINE)(PVOID ApcContext, PIO_STATUS_BLOCK IoStatusBlock, ULONG Reserved); @@ -121,6 +127,15 @@ typedef struct _OBJECT_ATTRIBUTES { BOOLEAN Alertable, \ PLARGE_INTEGER Timeout)) \ \ + X(NTSTATUS, \ + NTAPI, \ + NtSetIoCompletion, \ + (HANDLE IoCompletionHandle, \ + PVOID KeyContext, \ + PVOID ApcContext, \ + NTSTATUS IoStatus, \ + ULONG_PTR IoStatusInformation)) \ + \ X(NTSTATUS, \ NTAPI, \ NtWaitForKeyedEvent, \ diff --git a/src/port.c b/src/port.c index 58dccc1..6a63fcb 100644 --- a/src/port.c +++ b/src/port.c @@ -417,10 +417,10 @@ queue_t* port_get_poll_group_queue(port_state_t* port_state) { return &port_state->poll_group_queue; } -port_state_t* port_state_from_handle_tree_node(ts_tree_node_t* tree_node) { +port_state_t* port_state_from_tree_node(ts_tree_node_t* tree_node) { return container_of(tree_node, port_state_t, handle_tree_node); } -ts_tree_node_t* port_state_to_handle_tree_node(port_state_t* port_state) { +ts_tree_node_t* port_state_to_tree_node(port_state_t* port_state) { return &port_state->handle_tree_node; } diff --git a/src/port.h b/src/port.h index 244de13..212087b 100644 --- a/src/port.h +++ b/src/port.h @@ -45,9 +45,9 @@ WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t* port_state, WEPOLL_INTERNAL HANDLE port_get_iocp_handle(port_state_t* port_state); WEPOLL_INTERNAL queue_t* port_get_poll_group_queue(port_state_t* port_state); -WEPOLL_INTERNAL port_state_t* port_state_from_handle_tree_node( +WEPOLL_INTERNAL port_state_t* port_state_from_tree_node( ts_tree_node_t* tree_node); -WEPOLL_INTERNAL ts_tree_node_t* port_state_to_handle_tree_node( +WEPOLL_INTERNAL ts_tree_node_t* port_state_to_tree_node( port_state_t* port_state); #endif /* WEPOLL_PORT_H_ */ diff --git a/src/sock.c b/src/sock.c index c578d79..df54c73 100644 --- a/src/sock.c +++ b/src/sock.c @@ -5,6 +5,7 @@ #include #include "afd.h" +#include "api.h" #include "error.h" #include "poll-group.h" #include "port.h" @@ -15,10 +16,6 @@ #include "wepoll.h" #include "ws.h" -#define SOCK__KNOWN_EPOLL_EVENTS \ - (EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | \ - EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP) - typedef enum sock__poll_status { SOCK__POLL_IDLE = 0, SOCK__POLL_PENDING, @@ -152,7 +149,7 @@ int sock_set_event(port_state_t* port_state, sock_state->user_events = events; sock_state->user_data = ev->data; - if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0) + if ((events & API_VALID_EPOLL_EVENTS & ~sock_state->pending_events) != 0) port_request_socket_update(port_state, sock_state); return 0; @@ -204,7 +201,7 @@ int sock_update(port_state_t* port_state, sock_state_t* sock_state) { assert(!sock_state->delete_pending); if ((sock_state->poll_status == SOCK__POLL_PENDING) && - (sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS & + (sock_state->user_events & API_VALID_EPOLL_EVENTS & ~sock_state->pending_events) == 0) { /* All the events the user is interested in are already being monitored by * the pending poll operation. It might spuriously complete because of an diff --git a/src/synthetic-event.c b/src/synthetic-event.c new file mode 100644 index 0000000..82ccd58 --- /dev/null +++ b/src/synthetic-event.c @@ -0,0 +1,92 @@ +#include +#include +#include + +#include "api.h" +#include "error.h" +#include "nt.h" +#include "port.h" +#include "synthetic-event.h" +#include "wepoll.h" +#include "win.h" + +/* + * Completion packets that carry a synthetic event are distinguished from + * regular I/O completions by a special `NTSTATUS` value. This is a 32-bit + * integer, which is constructed as follows: + * + * Bit(s) Len Value Meaning + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * MSB 31-30 2 0 0 Is success (not: error/warning/informational) + * 29 1 1 Is application-defined (not: Windows-defined) + * 28 1 0 Reserved for `HRESULT`, must be 0 + * 27 1 0 Reserved, must be 0 + * 26-16 11 0x3bb Facility number + * LSB 15-00 16 varies Epoll events (copied from `struct epoll_event`) + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#define SYNTHETIC_EVENT__NTSTATUS 0x23bb0000UL + +int synthetic_event_post_completion(port_state_t* port_state, + const struct epoll_event* ev) { + HANDLE iocp_handle; + NTSTATUS status; + + /* These are the "fields" of the completion packet that will be posted to + * the I/O completion port. Windows will not interpret or modify them when + * `NtSetIoCompletion` is used. A fourth field, typically called + * `IoCompletionKey` or `KeyContext`, is deliberatly not used; we might need + * it in the future to add support for sharing an I/O completion port with + * other libraries. */ + uint32_t status_field; + uintptr_t data_fields[2] = {0, 0}; + + /* Validate the events and flags specified by `ev.events`: + * - Only bits corresponding with actual epoll events may be set. + * - It is explicitly allowed to not specify any events at all. + * - The `EPOLLONESHOT` flag must be set. + * - No other flags may be specified. + */ + if ((ev->events & ~API_VALID_EPOLL_EVENTS) != EPOLLONESHOT) + return_set_error(-1, ERROR_INVALID_PARAMETER); + status_field = + SYNTHETIC_EVENT__NTSTATUS | (ev->events & API_VALID_EPOLL_EVENTS); + + /* On 64-bit platforms, `epoll_data_t` is the size of a pointer, so only + * `data_fields[0]` is assigned a value here. On 32-bit platforms, both + * array elements are used. */ + assert(sizeof ev->data <= sizeof data_fields); + memcpy(data_fields, &ev->data, sizeof ev->data); + + iocp_handle = port_get_iocp_handle(port_state); + + // TODO: check + // https://bugs.chromium.org/p/project-zero/issues/detail?id=1269. + status = NtSetIoCompletion(iocp_handle, + NULL, + (VOID*) data_fields[0], + (NTSTATUS) status_field, + (ULONG_PTR) data_fields[1]); + if (status != STATUS_SUCCESS) + return_set_error(-1, RtlNtStatusToDosError(status)); + + return 0; +} + +int synthetic_event_feed_completion(const OVERLAPPED_ENTRY* completion, + struct epoll_event* ev) { + uint32_t status_field; + uintptr_t data_fields[2]; + + status_field = (uint32_t) completion->Internal; + if ((status_field & ~API_VALID_EPOLL_EVENTS) != SYNTHETIC_EVENT__NTSTATUS) + return 0; /* This completion packet does not contain a synthetic event. */ + + ev->events = status_field & API_VALID_EPOLL_EVENTS; + + data_fields[0] = (uintptr_t) completion->lpOverlapped; + data_fields[1] = completion->dwNumberOfBytesTransferred; + memcpy(&ev->data, data_fields, sizeof ev->data); + + return 1; +} diff --git a/src/synthetic-event.h b/src/synthetic-event.h new file mode 100644 index 0000000..b76e8ba --- /dev/null +++ b/src/synthetic-event.h @@ -0,0 +1,15 @@ +#ifndef WEPOLL_SYNTHETIC_EVENT_H_ +#define WEPOLL_SYNTHETIC_EVENT_H_ + +#include "config.h" +#include "wepoll.h" +#include "win.h" + +typedef struct port_state port_state_t; + +WEPOLL_INTERNAL int synthetic_event_post_completion( + port_state_t* port_state, const struct epoll_event* ev); +WEPOLL_INTERNAL int synthetic_event_feed_completion( + const OVERLAPPED_ENTRY* completion, struct epoll_event* ev); + +#endif /* WEPOLL_SYNTHETIC_EVENT_H_ */