changeset 6635:6acaa638fa07

Events: support for EPOLLEXCLUSIVE. This flag appeared in Linux 4.5 and is useful for avoiding thundering herd problem. The current Linux kernel implementation walks the list of exclusive waiters, and queues an event to each epfd, until it finds the first waiter that has threads blocked on it via epoll_wait().
author Valentin Bartenev <vbart@nginx.com>
date Fri, 15 Jul 2016 15:18:57 +0300
parents 18f6120e3b7a
children ea284434db0f
files auto/modules auto/os/linux src/event/modules/ngx_epoll_module.c src/event/ngx_event.c src/event/ngx_event.h
diffstat 5 files changed, 59 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/auto/modules	Fri Jul 15 15:18:57 2016 +0300
+++ b/auto/modules	Fri Jul 15 15:18:57 2016 +0300
@@ -43,6 +43,7 @@
 if [ $NGX_TEST_BUILD_EPOLL = YES ]; then
     have=NGX_HAVE_EPOLL . auto/have
     have=NGX_HAVE_EPOLLRDHUP . auto/have
+    have=NGX_HAVE_EPOLLEXCLUSIVE . auto/have
     have=NGX_HAVE_EVENTFD . auto/have
     have=NGX_TEST_BUILD_EPOLL . auto/have
     EVENT_MODULES="$EVENT_MODULES $EPOLL_MODULE"
--- a/auto/os/linux	Fri Jul 15 15:18:57 2016 +0300
+++ b/auto/os/linux	Fri Jul 15 15:18:57 2016 +0300
@@ -70,6 +70,22 @@
                       ee.data.ptr = NULL;
                       epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ee)"
     . auto/feature
+
+
+    # EPOLLEXCLUSIVE appeared in Linux 4.5, glibc 2.24
+
+    ngx_feature="EPOLLEXCLUSIVE"
+    ngx_feature_name="NGX_HAVE_EPOLLEXCLUSIVE"
+    ngx_feature_run=no
+    ngx_feature_incs="#include <sys/epoll.h>"
+    ngx_feature_path=
+    ngx_feature_libs=
+    ngx_feature_test="int efd = 0, fd = 0;
+                      struct epoll_event ee;
+                      ee.events = EPOLLIN|EPOLLEXCLUSIVE;
+                      ee.data.ptr = NULL;
+                      epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ee)"
+    . auto/feature
 fi
 
 
--- a/src/event/modules/ngx_epoll_module.c	Fri Jul 15 15:18:57 2016 +0300
+++ b/src/event/modules/ngx_epoll_module.c	Fri Jul 15 15:18:57 2016 +0300
@@ -27,6 +27,7 @@
 
 #define EPOLLRDHUP     0x2000
 
+#define EPOLLEXCLUSIVE 0x10000000
 #define EPOLLONESHOT   0x40000000
 #define EPOLLET        0x80000000
 
@@ -610,6 +611,12 @@
         op = EPOLL_CTL_ADD;
     }
 
+#if (NGX_HAVE_EPOLLEXCLUSIVE && NGX_HAVE_EPOLLRDHUP)
+    if (flags & NGX_EXCLUSIVE_EVENT) {
+        events &= ~EPOLLRDHUP;
+    }
+#endif
+
     ee.events = events | (uint32_t) flags;
     ee.data.ptr = (void *) ((uintptr_t) c | ev->instance);
 
--- a/src/event/ngx_event.c	Fri Jul 15 15:18:57 2016 +0300
+++ b/src/event/ngx_event.c	Fri Jul 15 15:18:57 2016 +0300
@@ -822,15 +822,38 @@
         rev->handler = (c->type == SOCK_STREAM) ? ngx_event_accept
                                                 : ngx_event_recvmsg;
 
-        if (ngx_use_accept_mutex
 #if (NGX_HAVE_REUSEPORT)
-            && !ls[i].reuseport
+
+        if (ls[i].reuseport) {
+            if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
+                return NGX_ERROR;
+            }
+
+            continue;
+        }
+
 #endif
-           )
-        {
+
+        if (ngx_use_accept_mutex) {
             continue;
         }
 
+#if (NGX_HAVE_EPOLLEXCLUSIVE)
+
+        if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
+            && ccf->worker_processes > 1)
+        {
+            if (ngx_add_event(rev, NGX_READ_EVENT, NGX_EXCLUSIVE_EVENT)
+                == NGX_ERROR)
+            {
+                return NGX_ERROR;
+            }
+
+            continue;
+        }
+
+#endif
+
         if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
             return NGX_ERROR;
         }
--- a/src/event/ngx_event.h	Fri Jul 15 15:18:57 2016 +0300
+++ b/src/event/ngx_event.h	Fri Jul 15 15:18:57 2016 +0300
@@ -367,6 +367,9 @@
 #define NGX_ONESHOT_EVENT  EPOLLONESHOT
 #endif
 
+#if (NGX_HAVE_EPOLLEXCLUSIVE)
+#define NGX_EXCLUSIVE_EVENT  EPOLLEXCLUSIVE
+#endif
 
 #elif (NGX_HAVE_POLL)
 
@@ -395,6 +398,11 @@
 #endif
 
 
+#if (NGX_TEST_BUILD_EPOLL)
+#define NGX_EXCLUSIVE_EVENT  0
+#endif
+
+
 #ifndef NGX_CLEAR_EVENT
 #define NGX_CLEAR_EVENT    0    /* dummy declaration */
 #endif