Mercurial > hg > nginx
diff src/event/quic/ngx_event_quic_bpf.c @ 8676:7df607cb2d11 quic
QUIC: ngx_quic_bpf module.
The quic kernel bpf helper inspects packet payload for DCID, extracts key
and routes the packet into socket matching the key.
Due to reuseport feature, each worker owns a personal socket, which is
identified by the same key, used to create DCID.
BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK.
The "ulimit -l" command may be used to setup proper limits, if maps
cannot be created with EPERM or updated with ETOOLONG.
author | Vladimir Homutov <vl@nginx.com> |
---|---|
date | Fri, 25 Dec 2020 15:01:15 +0300 |
parents | |
children | d4e02b3b734f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/event/quic/ngx_event_quic_bpf.c Fri Dec 25 15:01:15 2020 +0300 @@ -0,0 +1,649 @@ + +/* + * Copyright (C) Nginx, Inc. + */ + + +#include <ngx_config.h> +#include <ngx_core.h> + + +#define NGX_QUIC_BPF_VARNAME "NGINX_BPF_MAPS" +#define NGX_QUIC_BPF_VARSEP ';' +#define NGX_QUIC_BPF_ADDRSEP '#' + + +#define ngx_quic_bpf_get_conf(cycle) \ + (ngx_quic_bpf_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_quic_bpf_module) + +#define ngx_quic_bpf_get_old_conf(cycle) \ + cycle->old_cycle->conf_ctx ? ngx_quic_bpf_get_conf(cycle->old_cycle) \ + : NULL + +#define ngx_core_get_conf(cycle) \ + (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module) + + +typedef struct { + ngx_queue_t queue; + int map_fd; + + struct sockaddr *sockaddr; + socklen_t socklen; + ngx_uint_t unused; /* unsigned unused:1; */ +} ngx_quic_sock_group_t; + + +typedef struct { + ngx_flag_t enabled; + ngx_uint_t map_size; + ngx_queue_t groups; /* of ngx_quic_sock_group_t */ +} ngx_quic_bpf_conf_t; + + +static void *ngx_quic_bpf_create_conf(ngx_cycle_t *cycle); +static ngx_int_t ngx_quic_bpf_module_init(ngx_cycle_t *cycle); + +static void ngx_quic_bpf_cleanup(void *data); +static ngx_inline void ngx_quic_bpf_close(ngx_log_t *log, int fd, + const char *name); + +static ngx_quic_sock_group_t *ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, + ngx_listening_t *ls); +static ngx_quic_sock_group_t *ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, + struct sockaddr *sa, socklen_t socklen); +static ngx_quic_sock_group_t *ngx_quic_bpf_create_group(ngx_cycle_t *cycle, + ngx_listening_t *ls); +static ngx_quic_sock_group_t *ngx_quic_bpf_get_group(ngx_cycle_t *cycle, + ngx_listening_t *ls); +static ngx_int_t ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle, + ngx_listening_t *ls); +static uint64_t ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log); + +static ngx_int_t ngx_quic_bpf_export_maps(ngx_cycle_t *cycle); +static ngx_int_t ngx_quic_bpf_import_maps(ngx_cycle_t *cycle); + +extern ngx_bpf_program_t ngx_quic_reuseport_helper; + + +static ngx_command_t ngx_quic_bpf_commands[] = { + + { ngx_string("quic_bpf"), + NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_FLAG, + ngx_conf_set_flag_slot, + 0, + offsetof(ngx_quic_bpf_conf_t, enabled), + NULL }, + + ngx_null_command +}; + + +static ngx_core_module_t ngx_quic_bpf_module_ctx = { + ngx_string("quic_bpf"), + ngx_quic_bpf_create_conf, + NULL +}; + + +ngx_module_t ngx_quic_bpf_module = { + NGX_MODULE_V1, + &ngx_quic_bpf_module_ctx, /* module context */ + ngx_quic_bpf_commands, /* module directives */ + NGX_CORE_MODULE, /* module type */ + NULL, /* init master */ + ngx_quic_bpf_module_init, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static void * +ngx_quic_bpf_create_conf(ngx_cycle_t *cycle) +{ + ngx_quic_bpf_conf_t *bcf; + + bcf = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_bpf_conf_t)); + if (bcf == NULL) { + return NULL; + } + + bcf->enabled = NGX_CONF_UNSET; + bcf->map_size = NGX_CONF_UNSET_UINT; + + ngx_queue_init(&bcf->groups); + + return bcf; +} + + +static ngx_int_t +ngx_quic_bpf_module_init(ngx_cycle_t *cycle) +{ + ngx_uint_t i; + ngx_listening_t *ls; + ngx_core_conf_t *ccf; + ngx_pool_cleanup_t *cln; + ngx_quic_bpf_conf_t *bcf; + + ccf = ngx_core_get_conf(cycle); + bcf = ngx_quic_bpf_get_conf(cycle); + + ngx_conf_init_value(bcf->enabled, 0); + + bcf->map_size = ccf->worker_processes * 4; + + cln = ngx_pool_cleanup_add(cycle->pool, 0); + if (cln == NULL) { + goto failed; + } + + cln->data = bcf; + cln->handler = ngx_quic_bpf_cleanup; + + if (ngx_inherited && ngx_is_init_cycle(cycle->old_cycle)) { + if (ngx_quic_bpf_import_maps(cycle) != NGX_OK) { + goto failed; + } + } + + ls = cycle->listening.elts; + + for (i = 0; i < cycle->listening.nelts; i++) { + if (ls[i].quic && ls[i].reuseport) { + if (ngx_quic_bpf_group_add_socket(cycle, &ls[i]) != NGX_OK) { + goto failed; + } + } + } + + if (ngx_quic_bpf_export_maps(cycle) != NGX_OK) { + goto failed; + } + + return NGX_OK; + +failed: + + if (ngx_is_init_cycle(cycle->old_cycle)) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "ngx_quic_bpf_module failed to initialize, check limits"); + + /* refuse to start */ + return NGX_ERROR; + } + + /* + * returning error now will lead to master process exiting immediately + * leaving worker processes orphaned, what is really unexpected. + * Instead, just issue a not about failed initialization and try + * to cleanup a bit. Still program can be already loaded to kernel + * for some reuseport groups, and there is no way to revert, so + * behaviour may be inconsistent. + */ + + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "ngx_quic_bpf_module failed to initialize properly, ignored." + "please check limits and note that nginx state now " + "can be inconsistent and restart may be required"); + + return NGX_OK; +} + + +static void +ngx_quic_bpf_cleanup(void *data) +{ + ngx_quic_bpf_conf_t *bcf = (ngx_quic_bpf_conf_t *) data; + + ngx_queue_t *q; + ngx_quic_sock_group_t *grp; + + for (q = ngx_queue_head(&bcf->groups); + q != ngx_queue_sentinel(&bcf->groups); + q = ngx_queue_next(q)) + { + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + ngx_quic_bpf_close(ngx_cycle->log, grp->map_fd, "map"); + } +} + + +static ngx_inline void +ngx_quic_bpf_close(ngx_log_t *log, int fd, const char *name) +{ + if (close(fd) != -1) { + return; + } + + ngx_log_error(NGX_LOG_EMERG, log, ngx_errno, + "quic bpf close %s fd:%i failed", name, fd); +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, ngx_listening_t *ls) +{ + ngx_queue_t *q; + ngx_quic_sock_group_t *grp; + + for (q = ngx_queue_head(&bcf->groups); + q != ngx_queue_sentinel(&bcf->groups); + q = ngx_queue_next(q)) + { + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + if (ngx_cmp_sockaddr(ls->sockaddr, ls->socklen, + grp->sockaddr, grp->socklen, 1) + == NGX_OK) + { + return grp; + } + } + + return NULL; +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, struct sockaddr *sa, + socklen_t socklen) +{ + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + grp = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_sock_group_t)); + if (grp == NULL) { + return NULL; + } + + grp->socklen = socklen; + grp->sockaddr = ngx_palloc(cycle->pool, socklen); + if (grp->sockaddr == NULL) { + return NULL; + } + ngx_memcpy(grp->sockaddr, sa, socklen); + + ngx_queue_insert_tail(&bcf->groups, &grp->queue); + + return grp; +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_create_group(ngx_cycle_t *cycle, ngx_listening_t *ls) +{ + int progfd, failed, flags, rc; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + if (!bcf->enabled) { + return NULL; + } + + grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen); + if (grp == NULL) { + return NULL; + } + + grp->map_fd = ngx_bpf_map_create(cycle->log, BPF_MAP_TYPE_SOCKHASH, + sizeof(uint64_t), sizeof(uint64_t), + bcf->map_size, 0); + if (grp->map_fd == -1) { + goto failed; + } + + flags = fcntl(grp->map_fd, F_GETFD); + if (flags == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, errno, + "quic bpf getfd failed"); + goto failed; + } + + /* need to inherit map during binary upgrade after exec */ + flags &= ~FD_CLOEXEC; + + rc = fcntl(grp->map_fd, F_SETFD, flags); + if (rc == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, errno, + "quic bpf setfd failed"); + goto failed; + } + + ngx_bpf_program_link(&ngx_quic_reuseport_helper, + "ngx_quic_sockmap", grp->map_fd); + + progfd = ngx_bpf_load_program(cycle->log, &ngx_quic_reuseport_helper); + if (progfd < 0) { + goto failed; + } + + failed = 0; + + if (setsockopt(ls->fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, + &progfd, sizeof(int)) + == -1) + { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_socket_errno, + "quic bpf setsockopt(SO_ATTACH_REUSEPORT_EBPF) failed"); + failed = 1; + } + + ngx_quic_bpf_close(cycle->log, progfd, "program"); + + if (failed) { + goto failed; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap created fd:%i", grp->map_fd); + return grp; + +failed: + + if (grp->map_fd != -1) { + ngx_quic_bpf_close(cycle->log, grp->map_fd, "map"); + } + + ngx_queue_remove(&grp->queue); + + return NULL; +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_get_group(ngx_cycle_t *cycle, ngx_listening_t *ls) +{ + ngx_quic_bpf_conf_t *bcf, *old_bcf; + ngx_quic_sock_group_t *grp, *ogrp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + grp = ngx_quic_bpf_find_group(bcf, ls); + if (grp) { + return grp; + } + + old_bcf = ngx_quic_bpf_get_old_conf(cycle); + + if (old_bcf == NULL) { + return ngx_quic_bpf_create_group(cycle, ls); + } + + ogrp = ngx_quic_bpf_find_group(old_bcf, ls); + if (ogrp == NULL) { + return ngx_quic_bpf_create_group(cycle, ls); + } + + grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen); + if (grp == NULL) { + return NULL; + } + + grp->map_fd = dup(ogrp->map_fd); + if (grp->map_fd == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "quic bpf failed to duplicate bpf map descriptor"); + + ngx_queue_remove(&grp->queue); + + return NULL; + } + + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap fd duplicated old:%i new:%i", + ogrp->map_fd, grp->map_fd); + + return grp; +} + + +static ngx_int_t +ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle, ngx_listening_t *ls) +{ + uint64_t cookie; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + grp = ngx_quic_bpf_get_group(cycle, ls); + + if (grp == NULL) { + if (!bcf->enabled) { + return NGX_OK; + } + + return NGX_ERROR; + } + + grp->unused = 0; + + cookie = ngx_quic_bpf_socket_key(ls->fd, cycle->log); + if (cookie == (uint64_t) NGX_ERROR) { + return NGX_ERROR; + } + + /* map[cookie] = socket; for use in kernel helper */ + if (ngx_bpf_map_update(grp->map_fd, &cookie, &ls->fd, BPF_ANY) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "quic bpf failed to update socket map key=%xL", cookie); + return NGX_ERROR; + } + + ngx_log_debug4(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap fd:%d add socket:%d cookie:0x%xL worker:%d", + grp->map_fd, ls->fd, cookie, ls->worker); + + /* do not inherit this socket */ + ls->ignore = 1; + + return NGX_OK; +} + + +static uint64_t +ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log) +{ + uint64_t cookie; + socklen_t optlen; + + optlen = sizeof(cookie); + + if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) { + ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno, + "quic bpf getsockopt(SO_COOKIE) failed"); + + return (ngx_uint_t) NGX_ERROR; + } + + return cookie; +} + + +static ngx_int_t +ngx_quic_bpf_export_maps(ngx_cycle_t *cycle) +{ + u_char *p, *buf; + size_t len; + ngx_str_t *var; + ngx_queue_t *q; + ngx_core_conf_t *ccf; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + ccf = ngx_core_get_conf(cycle); + bcf = ngx_quic_bpf_get_conf(cycle); + + len = sizeof(NGX_QUIC_BPF_VARNAME) + 1; + + q = ngx_queue_head(&bcf->groups); + + while (q != ngx_queue_sentinel(&bcf->groups)) { + + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + q = ngx_queue_next(q); + + if (grp->unused) { + /* + * map was inherited, but it is not used in this configuration; + * do not pass such map further and drop the group to prevent + * interference with changes during reload + */ + + ngx_quic_bpf_close(cycle->log, grp->map_fd, "map"); + ngx_queue_remove(&grp->queue); + + continue; + } + + len += NGX_INT32_LEN + 1 + NGX_SOCKADDR_STRLEN + 1; + } + + len++; + + buf = ngx_palloc(cycle->pool, len); + if (buf == NULL) { + return NGX_ERROR; + } + + p = ngx_cpymem(buf, NGX_QUIC_BPF_VARNAME "=", + sizeof(NGX_QUIC_BPF_VARNAME)); + + for (q = ngx_queue_head(&bcf->groups); + q != ngx_queue_sentinel(&bcf->groups); + q = ngx_queue_next(q)) + { + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + p = ngx_sprintf(p, "%ud", grp->map_fd); + + *p++ = NGX_QUIC_BPF_ADDRSEP; + + p += ngx_sock_ntop(grp->sockaddr, grp->socklen, p, + NGX_SOCKADDR_STRLEN, 1); + + *p++ = NGX_QUIC_BPF_VARSEP; + } + + *p = '\0'; + + var = ngx_array_push(&ccf->env); + if (var == NULL) { + return NGX_ERROR; + } + + var->data = buf; + var->len = sizeof(NGX_QUIC_BPF_VARNAME) - 1; + + return NGX_OK; +} + + +static ngx_int_t +ngx_quic_bpf_import_maps(ngx_cycle_t *cycle) +{ + int s; + u_char *inherited, *p, *v; + ngx_uint_t in_fd; + ngx_addr_t tmp; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + inherited = (u_char *) getenv(NGX_QUIC_BPF_VARNAME); + + if (inherited == NULL) { + return NGX_OK; + } + + bcf = ngx_quic_bpf_get_conf(cycle); + +#if (NGX_SUPPRESS_WARN) + s = -1; +#endif + + in_fd = 1; + + for (p = inherited, v = p; *p; p++) { + + switch (*p) { + + case NGX_QUIC_BPF_ADDRSEP: + + if (!in_fd) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited env"); + return NGX_ERROR; + } + in_fd = 0; + + s = ngx_atoi(v, p - v); + if (s == NGX_ERROR) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited map fd"); + return NGX_ERROR; + } + + v = p + 1; + break; + + case NGX_QUIC_BPF_VARSEP: + + if (in_fd) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited env"); + return NGX_ERROR; + } + in_fd = 1; + + grp = ngx_pcalloc(cycle->pool, + sizeof(ngx_quic_sock_group_t)); + if (grp == NULL) { + return NGX_ERROR; + } + + grp->map_fd = s; + + if (ngx_parse_addr_port(cycle->pool, &tmp, v, p - v) + != NGX_OK) + { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited" + " address '%*s'", p - v , v); + + ngx_quic_bpf_close(cycle->log, s, "inherited map"); + + return NGX_ERROR; + } + + grp->sockaddr = tmp.sockaddr; + grp->socklen = tmp.socklen; + + grp->unused = 1; + + ngx_queue_insert_tail(&bcf->groups, &grp->queue); + + ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap inherited with " + "fd:%i address:%*s", + grp->map_fd, p - v, v); + v = p + 1; + break; + + default: + break; + } + } + + return NGX_OK; +}