Mercurial > hg > nginx
annotate src/stream/ngx_stream.c @ 6153:4f6efabcb09b
The "reuseport" option of the "listen" directive.
When configured, an individual listen socket on a given address is
created for each worker process. This allows to reduce in-kernel lock
contention on configurations with high accept rates, resulting in better
performance. As of now it works on Linux and DragonFly BSD.
Note that on Linux incoming connection requests are currently tied up
to a specific listen socket, and if some sockets are closed, connection
requests will be reset, see https://lwn.net/Articles/542629/. With
nginx, this may happen if the number of worker processes is reduced.
There is no such problem on DragonFly BSD.
Based on previous work by Sepherosa Ziehau and Yingqi Lu.
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Wed, 20 May 2015 15:51:56 +0300 |
parents | 3c344ea7d88b |
children | f654addf0eea |
rev | line source |
---|---|
6115 | 1 |
2 /* | |
3 * Copyright (C) Roman Arutyunyan | |
4 * Copyright (C) Nginx, Inc. | |
5 */ | |
6 | |
7 | |
8 #include <ngx_config.h> | |
9 #include <ngx_core.h> | |
10 #include <ngx_event.h> | |
11 #include <ngx_stream.h> | |
12 | |
13 | |
14 static char *ngx_stream_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf); | |
15 static ngx_int_t ngx_stream_add_ports(ngx_conf_t *cf, ngx_array_t *ports, | |
16 ngx_stream_listen_t *listen); | |
17 static char *ngx_stream_optimize_servers(ngx_conf_t *cf, ngx_array_t *ports); | |
18 static ngx_int_t ngx_stream_add_addrs(ngx_conf_t *cf, ngx_stream_port_t *stport, | |
19 ngx_stream_conf_addr_t *addr); | |
20 #if (NGX_HAVE_INET6) | |
21 static ngx_int_t ngx_stream_add_addrs6(ngx_conf_t *cf, | |
22 ngx_stream_port_t *stport, ngx_stream_conf_addr_t *addr); | |
23 #endif | |
24 static ngx_int_t ngx_stream_cmp_conf_addrs(const void *one, const void *two); | |
25 | |
26 | |
27 ngx_uint_t ngx_stream_max_module; | |
28 | |
29 | |
30 static ngx_command_t ngx_stream_commands[] = { | |
31 | |
32 { ngx_string("stream"), | |
33 NGX_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_NOARGS, | |
34 ngx_stream_block, | |
35 0, | |
36 0, | |
37 NULL }, | |
38 | |
39 ngx_null_command | |
40 }; | |
41 | |
42 | |
43 static ngx_core_module_t ngx_stream_module_ctx = { | |
44 ngx_string("stream"), | |
45 NULL, | |
46 NULL | |
47 }; | |
48 | |
49 | |
50 ngx_module_t ngx_stream_module = { | |
51 NGX_MODULE_V1, | |
52 &ngx_stream_module_ctx, /* module context */ | |
53 ngx_stream_commands, /* module directives */ | |
54 NGX_CORE_MODULE, /* module type */ | |
55 NULL, /* init master */ | |
56 NULL, /* init module */ | |
57 NULL, /* init process */ | |
58 NULL, /* init thread */ | |
59 NULL, /* exit thread */ | |
60 NULL, /* exit process */ | |
61 NULL, /* exit master */ | |
62 NGX_MODULE_V1_PADDING | |
63 }; | |
64 | |
65 | |
66 static char * | |
67 ngx_stream_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) | |
68 { | |
69 char *rv; | |
70 ngx_uint_t i, m, mi, s; | |
71 ngx_conf_t pcf; | |
72 ngx_array_t ports; | |
73 ngx_stream_listen_t *listen; | |
74 ngx_stream_module_t *module; | |
75 ngx_stream_conf_ctx_t *ctx; | |
76 ngx_stream_core_srv_conf_t **cscfp; | |
77 ngx_stream_core_main_conf_t *cmcf; | |
78 | |
79 /* the main stream context */ | |
80 | |
81 ctx = ngx_pcalloc(cf->pool, sizeof(ngx_stream_conf_ctx_t)); | |
82 if (ctx == NULL) { | |
83 return NGX_CONF_ERROR; | |
84 } | |
85 | |
86 *(ngx_stream_conf_ctx_t **) conf = ctx; | |
87 | |
88 /* count the number of the stream modules and set up their indices */ | |
89 | |
90 ngx_stream_max_module = 0; | |
91 for (m = 0; ngx_modules[m]; m++) { | |
92 if (ngx_modules[m]->type != NGX_STREAM_MODULE) { | |
93 continue; | |
94 } | |
95 | |
96 ngx_modules[m]->ctx_index = ngx_stream_max_module++; | |
97 } | |
98 | |
99 | |
100 /* the stream main_conf context, it's the same in the all stream contexts */ | |
101 | |
102 ctx->main_conf = ngx_pcalloc(cf->pool, | |
103 sizeof(void *) * ngx_stream_max_module); | |
104 if (ctx->main_conf == NULL) { | |
105 return NGX_CONF_ERROR; | |
106 } | |
107 | |
108 | |
109 /* | |
110 * the stream null srv_conf context, it is used to merge | |
111 * the server{}s' srv_conf's | |
112 */ | |
113 | |
114 ctx->srv_conf = ngx_pcalloc(cf->pool, | |
115 sizeof(void *) * ngx_stream_max_module); | |
116 if (ctx->srv_conf == NULL) { | |
117 return NGX_CONF_ERROR; | |
118 } | |
119 | |
120 | |
121 /* | |
122 * create the main_conf's and the null srv_conf's of the all stream modules | |
123 */ | |
124 | |
125 for (m = 0; ngx_modules[m]; m++) { | |
126 if (ngx_modules[m]->type != NGX_STREAM_MODULE) { | |
127 continue; | |
128 } | |
129 | |
130 module = ngx_modules[m]->ctx; | |
131 mi = ngx_modules[m]->ctx_index; | |
132 | |
133 if (module->create_main_conf) { | |
134 ctx->main_conf[mi] = module->create_main_conf(cf); | |
135 if (ctx->main_conf[mi] == NULL) { | |
136 return NGX_CONF_ERROR; | |
137 } | |
138 } | |
139 | |
140 if (module->create_srv_conf) { | |
141 ctx->srv_conf[mi] = module->create_srv_conf(cf); | |
142 if (ctx->srv_conf[mi] == NULL) { | |
143 return NGX_CONF_ERROR; | |
144 } | |
145 } | |
146 } | |
147 | |
148 | |
149 /* parse inside the stream{} block */ | |
150 | |
151 pcf = *cf; | |
152 cf->ctx = ctx; | |
153 | |
154 cf->module_type = NGX_STREAM_MODULE; | |
155 cf->cmd_type = NGX_STREAM_MAIN_CONF; | |
156 rv = ngx_conf_parse(cf, NULL); | |
157 | |
158 if (rv != NGX_CONF_OK) { | |
159 *cf = pcf; | |
160 return rv; | |
161 } | |
162 | |
163 | |
164 /* init stream{} main_conf's, merge the server{}s' srv_conf's */ | |
165 | |
166 cmcf = ctx->main_conf[ngx_stream_core_module.ctx_index]; | |
167 cscfp = cmcf->servers.elts; | |
168 | |
169 for (m = 0; ngx_modules[m]; m++) { | |
170 if (ngx_modules[m]->type != NGX_STREAM_MODULE) { | |
171 continue; | |
172 } | |
173 | |
174 module = ngx_modules[m]->ctx; | |
175 mi = ngx_modules[m]->ctx_index; | |
176 | |
177 /* init stream{} main_conf's */ | |
178 | |
179 cf->ctx = ctx; | |
180 | |
181 if (module->init_main_conf) { | |
182 rv = module->init_main_conf(cf, ctx->main_conf[mi]); | |
183 if (rv != NGX_CONF_OK) { | |
184 *cf = pcf; | |
185 return rv; | |
186 } | |
187 } | |
188 | |
189 for (s = 0; s < cmcf->servers.nelts; s++) { | |
190 | |
191 /* merge the server{}s' srv_conf's */ | |
192 | |
193 cf->ctx = cscfp[s]->ctx; | |
194 | |
195 if (module->merge_srv_conf) { | |
196 rv = module->merge_srv_conf(cf, | |
197 ctx->srv_conf[mi], | |
198 cscfp[s]->ctx->srv_conf[mi]); | |
199 if (rv != NGX_CONF_OK) { | |
200 *cf = pcf; | |
201 return rv; | |
202 } | |
203 } | |
204 } | |
205 } | |
206 | |
207 *cf = pcf; | |
208 | |
209 | |
210 if (ngx_array_init(&ports, cf->temp_pool, 4, sizeof(ngx_stream_conf_port_t)) | |
211 != NGX_OK) | |
212 { | |
213 return NGX_CONF_ERROR; | |
214 } | |
215 | |
216 listen = cmcf->listen.elts; | |
217 | |
218 for (i = 0; i < cmcf->listen.nelts; i++) { | |
219 if (ngx_stream_add_ports(cf, &ports, &listen[i]) != NGX_OK) { | |
220 return NGX_CONF_ERROR; | |
221 } | |
222 } | |
223 | |
224 return ngx_stream_optimize_servers(cf, &ports); | |
225 } | |
226 | |
227 | |
228 static ngx_int_t | |
229 ngx_stream_add_ports(ngx_conf_t *cf, ngx_array_t *ports, | |
230 ngx_stream_listen_t *listen) | |
231 { | |
232 in_port_t p; | |
233 ngx_uint_t i; | |
234 struct sockaddr *sa; | |
235 struct sockaddr_in *sin; | |
236 ngx_stream_conf_port_t *port; | |
237 ngx_stream_conf_addr_t *addr; | |
238 #if (NGX_HAVE_INET6) | |
239 struct sockaddr_in6 *sin6; | |
240 #endif | |
241 | |
242 sa = (struct sockaddr *) &listen->sockaddr; | |
243 | |
244 switch (sa->sa_family) { | |
245 | |
246 #if (NGX_HAVE_INET6) | |
247 case AF_INET6: | |
248 sin6 = (struct sockaddr_in6 *) sa; | |
249 p = sin6->sin6_port; | |
250 break; | |
251 #endif | |
252 | |
253 #if (NGX_HAVE_UNIX_DOMAIN) | |
254 case AF_UNIX: | |
255 p = 0; | |
256 break; | |
257 #endif | |
258 | |
259 default: /* AF_INET */ | |
260 sin = (struct sockaddr_in *) sa; | |
261 p = sin->sin_port; | |
262 break; | |
263 } | |
264 | |
265 port = ports->elts; | |
266 for (i = 0; i < ports->nelts; i++) { | |
267 if (p == port[i].port && sa->sa_family == port[i].family) { | |
268 | |
269 /* a port is already in the port list */ | |
270 | |
271 port = &port[i]; | |
272 goto found; | |
273 } | |
274 } | |
275 | |
276 /* add a port to the port list */ | |
277 | |
278 port = ngx_array_push(ports); | |
279 if (port == NULL) { | |
280 return NGX_ERROR; | |
281 } | |
282 | |
283 port->family = sa->sa_family; | |
284 port->port = p; | |
285 | |
286 if (ngx_array_init(&port->addrs, cf->temp_pool, 2, | |
287 sizeof(ngx_stream_conf_addr_t)) | |
288 != NGX_OK) | |
289 { | |
290 return NGX_ERROR; | |
291 } | |
292 | |
293 found: | |
294 | |
295 addr = ngx_array_push(&port->addrs); | |
296 if (addr == NULL) { | |
297 return NGX_ERROR; | |
298 } | |
299 | |
300 addr->sockaddr = (struct sockaddr *) &listen->sockaddr; | |
301 addr->socklen = listen->socklen; | |
302 addr->ctx = listen->ctx; | |
303 addr->bind = listen->bind; | |
304 addr->wildcard = listen->wildcard; | |
305 addr->so_keepalive = listen->so_keepalive; | |
306 #if (NGX_HAVE_KEEPALIVE_TUNABLE) | |
307 addr->tcp_keepidle = listen->tcp_keepidle; | |
308 addr->tcp_keepintvl = listen->tcp_keepintvl; | |
309 addr->tcp_keepcnt = listen->tcp_keepcnt; | |
310 #endif | |
311 #if (NGX_STREAM_SSL) | |
312 addr->ssl = listen->ssl; | |
313 #endif | |
314 #if (NGX_HAVE_INET6 && defined IPV6_V6ONLY) | |
315 addr->ipv6only = listen->ipv6only; | |
316 #endif | |
317 | |
318 return NGX_OK; | |
319 } | |
320 | |
321 | |
322 static char * | |
323 ngx_stream_optimize_servers(ngx_conf_t *cf, ngx_array_t *ports) | |
324 { | |
325 ngx_uint_t i, p, last, bind_wildcard; | |
326 ngx_listening_t *ls; | |
327 ngx_stream_port_t *stport; | |
328 ngx_stream_conf_port_t *port; | |
329 ngx_stream_conf_addr_t *addr; | |
330 ngx_stream_core_srv_conf_t *cscf; | |
331 | |
332 port = ports->elts; | |
333 for (p = 0; p < ports->nelts; p++) { | |
334 | |
335 ngx_sort(port[p].addrs.elts, (size_t) port[p].addrs.nelts, | |
336 sizeof(ngx_stream_conf_addr_t), ngx_stream_cmp_conf_addrs); | |
337 | |
338 addr = port[p].addrs.elts; | |
339 last = port[p].addrs.nelts; | |
340 | |
341 /* | |
342 * if there is the binding to the "*:port" then we need to bind() | |
343 * to the "*:port" only and ignore the other bindings | |
344 */ | |
345 | |
346 if (addr[last - 1].wildcard) { | |
347 addr[last - 1].bind = 1; | |
348 bind_wildcard = 1; | |
349 | |
350 } else { | |
351 bind_wildcard = 0; | |
352 } | |
353 | |
354 i = 0; | |
355 | |
356 while (i < last) { | |
357 | |
358 if (bind_wildcard && !addr[i].bind) { | |
359 i++; | |
360 continue; | |
361 } | |
362 | |
363 ls = ngx_create_listening(cf, addr[i].sockaddr, addr[i].socklen); | |
364 if (ls == NULL) { | |
365 return NGX_CONF_ERROR; | |
366 } | |
367 | |
368 ls->addr_ntop = 1; | |
369 ls->handler = ngx_stream_init_connection; | |
370 ls->pool_size = 256; | |
371 | |
372 cscf = addr->ctx->srv_conf[ngx_stream_core_module.ctx_index]; | |
6129
187aa751ad62
Core: the ngx_set_connection_log() macro.
Vladimir Homutov <vl@nginx.com>
parents:
6115
diff
changeset
|
373 |
6115 | 374 ls->logp = cscf->error_log; |
375 ls->log.data = &ls->addr_text; | |
376 ls->log.handler = ngx_accept_log_error; | |
377 | |
378 ls->keepalive = addr[i].so_keepalive; | |
379 #if (NGX_HAVE_KEEPALIVE_TUNABLE) | |
380 ls->keepidle = addr[i].tcp_keepidle; | |
381 ls->keepintvl = addr[i].tcp_keepintvl; | |
382 ls->keepcnt = addr[i].tcp_keepcnt; | |
383 #endif | |
384 | |
385 #if (NGX_HAVE_INET6 && defined IPV6_V6ONLY) | |
386 ls->ipv6only = addr[i].ipv6only; | |
387 #endif | |
388 | |
389 stport = ngx_palloc(cf->pool, sizeof(ngx_stream_port_t)); | |
390 if (stport == NULL) { | |
391 return NGX_CONF_ERROR; | |
392 } | |
393 | |
394 ls->servers = stport; | |
395 | |
6152
3c344ea7d88b
Simplified ngx_http_init_listening().
Maxim Dounin <mdounin@mdounin.ru>
parents:
6129
diff
changeset
|
396 stport->naddrs = i + 1; |
6115 | 397 |
398 switch (ls->sockaddr->sa_family) { | |
399 #if (NGX_HAVE_INET6) | |
400 case AF_INET6: | |
401 if (ngx_stream_add_addrs6(cf, stport, addr) != NGX_OK) { | |
402 return NGX_CONF_ERROR; | |
403 } | |
404 break; | |
405 #endif | |
406 default: /* AF_INET */ | |
407 if (ngx_stream_add_addrs(cf, stport, addr) != NGX_OK) { | |
408 return NGX_CONF_ERROR; | |
409 } | |
410 break; | |
411 } | |
412 | |
6153
4f6efabcb09b
The "reuseport" option of the "listen" directive.
Maxim Dounin <mdounin@mdounin.ru>
parents:
6152
diff
changeset
|
413 if (ngx_clone_listening(cf, ls) != NGX_OK) { |
4f6efabcb09b
The "reuseport" option of the "listen" directive.
Maxim Dounin <mdounin@mdounin.ru>
parents:
6152
diff
changeset
|
414 return NGX_CONF_ERROR; |
4f6efabcb09b
The "reuseport" option of the "listen" directive.
Maxim Dounin <mdounin@mdounin.ru>
parents:
6152
diff
changeset
|
415 } |
4f6efabcb09b
The "reuseport" option of the "listen" directive.
Maxim Dounin <mdounin@mdounin.ru>
parents:
6152
diff
changeset
|
416 |
6115 | 417 addr++; |
418 last--; | |
419 } | |
420 } | |
421 | |
422 return NGX_CONF_OK; | |
423 } | |
424 | |
425 | |
426 static ngx_int_t | |
427 ngx_stream_add_addrs(ngx_conf_t *cf, ngx_stream_port_t *stport, | |
428 ngx_stream_conf_addr_t *addr) | |
429 { | |
430 u_char *p; | |
431 size_t len; | |
432 ngx_uint_t i; | |
433 struct sockaddr_in *sin; | |
434 ngx_stream_in_addr_t *addrs; | |
435 u_char buf[NGX_SOCKADDR_STRLEN]; | |
436 | |
437 stport->addrs = ngx_pcalloc(cf->pool, | |
438 stport->naddrs * sizeof(ngx_stream_in_addr_t)); | |
439 if (stport->addrs == NULL) { | |
440 return NGX_ERROR; | |
441 } | |
442 | |
443 addrs = stport->addrs; | |
444 | |
445 for (i = 0; i < stport->naddrs; i++) { | |
446 | |
447 sin = (struct sockaddr_in *) addr[i].sockaddr; | |
448 addrs[i].addr = sin->sin_addr.s_addr; | |
449 | |
450 addrs[i].conf.ctx = addr[i].ctx; | |
451 #if (NGX_STREAM_SSL) | |
452 addrs[i].conf.ssl = addr[i].ssl; | |
453 #endif | |
454 | |
455 len = ngx_sock_ntop(addr[i].sockaddr, addr[i].socklen, buf, | |
456 NGX_SOCKADDR_STRLEN, 1); | |
457 | |
458 p = ngx_pnalloc(cf->pool, len); | |
459 if (p == NULL) { | |
460 return NGX_ERROR; | |
461 } | |
462 | |
463 ngx_memcpy(p, buf, len); | |
464 | |
465 addrs[i].conf.addr_text.len = len; | |
466 addrs[i].conf.addr_text.data = p; | |
467 } | |
468 | |
469 return NGX_OK; | |
470 } | |
471 | |
472 | |
473 #if (NGX_HAVE_INET6) | |
474 | |
475 static ngx_int_t | |
476 ngx_stream_add_addrs6(ngx_conf_t *cf, ngx_stream_port_t *stport, | |
477 ngx_stream_conf_addr_t *addr) | |
478 { | |
479 u_char *p; | |
480 size_t len; | |
481 ngx_uint_t i; | |
482 struct sockaddr_in6 *sin6; | |
483 ngx_stream_in6_addr_t *addrs6; | |
484 u_char buf[NGX_SOCKADDR_STRLEN]; | |
485 | |
486 stport->addrs = ngx_pcalloc(cf->pool, | |
487 stport->naddrs * sizeof(ngx_stream_in6_addr_t)); | |
488 if (stport->addrs == NULL) { | |
489 return NGX_ERROR; | |
490 } | |
491 | |
492 addrs6 = stport->addrs; | |
493 | |
494 for (i = 0; i < stport->naddrs; i++) { | |
495 | |
496 sin6 = (struct sockaddr_in6 *) addr[i].sockaddr; | |
497 addrs6[i].addr6 = sin6->sin6_addr; | |
498 | |
499 addrs6[i].conf.ctx = addr[i].ctx; | |
500 #if (NGX_STREAM_SSL) | |
501 addrs6[i].conf.ssl = addr[i].ssl; | |
502 #endif | |
503 | |
504 len = ngx_sock_ntop(addr[i].sockaddr, addr[i].socklen, buf, | |
505 NGX_SOCKADDR_STRLEN, 1); | |
506 | |
507 p = ngx_pnalloc(cf->pool, len); | |
508 if (p == NULL) { | |
509 return NGX_ERROR; | |
510 } | |
511 | |
512 ngx_memcpy(p, buf, len); | |
513 | |
514 addrs6[i].conf.addr_text.len = len; | |
515 addrs6[i].conf.addr_text.data = p; | |
516 } | |
517 | |
518 return NGX_OK; | |
519 } | |
520 | |
521 #endif | |
522 | |
523 | |
524 static ngx_int_t | |
525 ngx_stream_cmp_conf_addrs(const void *one, const void *two) | |
526 { | |
527 ngx_stream_conf_addr_t *first, *second; | |
528 | |
529 first = (ngx_stream_conf_addr_t *) one; | |
530 second = (ngx_stream_conf_addr_t *) two; | |
531 | |
532 if (first->wildcard) { | |
533 /* a wildcard must be the last resort, shift it to the end */ | |
534 return 1; | |
535 } | |
536 | |
537 if (second->wildcard) { | |
538 /* a wildcard must be the last resort, shift it to the end */ | |
539 return -1; | |
540 } | |
541 | |
542 if (first->bind && !second->bind) { | |
543 /* shift explicit bind()ed addresses to the start */ | |
544 return -1; | |
545 } | |
546 | |
547 if (!first->bind && second->bind) { | |
548 /* shift explicit bind()ed addresses to the start */ | |
549 return 1; | |
550 } | |
551 | |
552 /* do not sort by default */ | |
553 | |
554 return 0; | |
555 } |