changeset 6536:f7849bfb6d21

Improved EPOLLRDHUP handling. When it's known that the kernel supports EPOLLRDHUP, there is no need in additional recv() call to get EOF or error when the flag is absent in the event generated by the kernel. A special runtime test is done at startup to detect if EPOLLRDHUP is actually supported by the kernel because epoll_ctl() silently ignores unknown flags. With this knowledge it's now possible to drop the "ready" flag for partial read. Previously, the "ready" flag was kept until the recv() returned EOF or error. In particular, this change allows the lingering close heuristics (which relies on the "ready" flag state) to actually work on Linux, and not wait for more data in most cases. The "available" flag is now used in the read event with the semantics similar to the corresponding counter in kqueue.
author Valentin Bartenev <vbart@nginx.com>
date Fri, 13 May 2016 17:19:23 +0300
parents db699978a33f
children 3ad1064a3aae
files src/event/modules/ngx_epoll_module.c src/event/ngx_event.h src/http/ngx_http_request.c src/http/ngx_http_upstream.c src/os/unix/ngx_readv_chain.c src/os/unix/ngx_recv.c
diffstat 6 files changed, 153 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/event/modules/ngx_epoll_module.c	Thu May 12 16:43:19 2016 +0300
+++ b/src/event/modules/ngx_epoll_module.c	Fri May 13 17:19:23 2016 +0300
@@ -123,6 +123,7 @@
 static void ngx_epoll_eventfd_handler(ngx_event_t *ev);
 #endif
 
+static ngx_int_t ngx_epoll_module_init(ngx_cycle_t *cycle);
 static void *ngx_epoll_create_conf(ngx_cycle_t *cycle);
 static char *ngx_epoll_init_conf(ngx_cycle_t *cycle, void *conf);
 
@@ -146,6 +147,10 @@
 
 #endif
 
+#if (NGX_HAVE_EPOLLRDHUP)
+ngx_uint_t                  ngx_use_epoll_rdhup;
+#endif
+
 static ngx_str_t      epoll_name = ngx_string("epoll");
 
 static ngx_command_t  ngx_epoll_commands[] = {
@@ -197,7 +202,7 @@
     ngx_epoll_commands,                  /* module directives */
     NGX_EVENT_MODULE,                    /* module type */
     NULL,                                /* init master */
-    NULL,                                /* init module */
+    ngx_epoll_module_init,               /* init module */
     NULL,                                /* init process */
     NULL,                                /* init thread */
     NULL,                                /* exit thread */
@@ -808,6 +813,8 @@
             if (revents & EPOLLRDHUP) {
                 rev->pending_eof = 1;
             }
+
+            rev->available = 1;
 #endif
 
             rev->ready = 1;
@@ -943,6 +950,69 @@
 #endif
 
 
+static ngx_int_t
+ngx_epoll_module_init(ngx_cycle_t *cycle)
+{
+#if (NGX_HAVE_EPOLLRDHUP)
+    int                 epfd, s[2], events;
+    struct epoll_event  ee;
+
+    epfd = epoll_create(1);
+
+    if (epfd == -1) {
+        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
+                      "epoll_create() failed");
+        return NGX_ERROR;
+    }
+
+    if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) == -1) {
+        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
+                      "socketpair() failed");
+        return NGX_ERROR;
+    }
+
+    ee.events = EPOLLET|EPOLLIN|EPOLLRDHUP;
+
+    if (epoll_ctl(epfd, EPOLL_CTL_ADD, s[0], &ee) == -1) {
+        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
+                      "epoll_ctl() failed");
+        return NGX_ERROR;
+    }
+
+    if (close(s[1]) == -1) {
+        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
+                      "close() failed");
+        return NGX_ERROR;
+    }
+
+    events = epoll_wait(epfd, &ee, 1, 5000);
+
+    if (events == -1) {
+        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
+                      "epoll_wait() failed");
+        return NGX_ERROR;
+    }
+
+    (void) close(s[0]);
+    (void) close(epfd);
+
+    if (events) {
+        ngx_use_epoll_rdhup = ee.events & EPOLLRDHUP;
+
+    } else {
+        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
+                      "epoll_wait() timedout");
+    }
+
+    ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0,
+                  "testing the EPOLLRDHUP flag: %s",
+                  ngx_use_epoll_rdhup ? "success" : "fail");
+#endif
+
+    return NGX_OK;
+}
+
+
 static void *
 ngx_epoll_create_conf(ngx_cycle_t *cycle)
 {
--- a/src/event/ngx_event.h	Thu May 12 16:43:19 2016 +0300
+++ b/src/event/ngx_event.h	Fri May 13 17:19:23 2016 +0300
@@ -96,6 +96,10 @@
      *   write:      available space in buffer when event is ready
      *               or lowat when event is set with NGX_LOWAT_EVENT flag
      *
+     * epoll with EPOLLRDHUP:
+     *   accept:     1 if accept many, 0 otherwise
+     *   read:       1 if there can be data to read, 0 otherwise
+     *
      * iocp: TODO
      *
      * otherwise:
@@ -196,6 +200,9 @@
 
 
 extern ngx_event_actions_t   ngx_event_actions;
+#if (NGX_HAVE_EPOLLRDHUP)
+extern ngx_uint_t            ngx_use_epoll_rdhup;
+#endif
 
 
 /*
--- a/src/http/ngx_http_request.c	Thu May 12 16:43:19 2016 +0300
+++ b/src/http/ngx_http_request.c	Fri May 13 17:19:23 2016 +0300
@@ -2752,9 +2752,13 @@
 
 #if (NGX_HAVE_EPOLLRDHUP)
 
-    if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && rev->pending_eof) {
+    if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && ngx_use_epoll_rdhup) {
         socklen_t  len;
 
+        if (!rev->pending_eof) {
+            return;
+        }
+
         rev->eof = 1;
         c->error = 1;
 
--- a/src/http/ngx_http_upstream.c	Thu May 12 16:43:19 2016 +0300
+++ b/src/http/ngx_http_upstream.c	Fri May 13 17:19:23 2016 +0300
@@ -1222,9 +1222,13 @@
 
 #if (NGX_HAVE_EPOLLRDHUP)
 
-    if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && ev->pending_eof) {
+    if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && ngx_use_epoll_rdhup) {
         socklen_t  len;
 
+        if (!ev->pending_eof) {
+            return;
+        }
+
         ev->eof = 1;
         c->error = 1;
 
--- a/src/os/unix/ngx_readv_chain.c	Thu May 12 16:43:19 2016 +0300
+++ b/src/os/unix/ngx_readv_chain.c	Fri May 13 17:19:23 2016 +0300
@@ -53,6 +53,20 @@
 
 #endif
 
+#if (NGX_HAVE_EPOLLRDHUP)
+
+    if (ngx_event_flags & NGX_USE_EPOLL_EVENT) {
+        ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "readv: eof:%d, avail:%d",
+                       rev->pending_eof, rev->available);
+
+        if (!rev->available && !rev->pending_eof) {
+            return NGX_AGAIN;
+        }
+    }
+
+#endif
+
     prev = NULL;
     iov = NULL;
     size = 0;
@@ -151,6 +165,24 @@
 
 #endif
 
+#if (NGX_HAVE_EPOLLRDHUP)
+
+            if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
+                && ngx_use_epoll_rdhup)
+            {
+                if (n < size) {
+                    if (!rev->pending_eof) {
+                        rev->ready = 0;
+                    }
+
+                    rev->available = 0;
+                }
+
+                return n;
+            }
+
+#endif
+
             if (n < size && !(ngx_event_flags & NGX_USE_GREEDY_EVENT)) {
                 rev->ready = 0;
             }
--- a/src/os/unix/ngx_recv.c	Thu May 12 16:43:19 2016 +0300
+++ b/src/os/unix/ngx_recv.c	Fri May 13 17:19:23 2016 +0300
@@ -50,6 +50,21 @@
 
 #endif
 
+#if (NGX_HAVE_EPOLLRDHUP)
+
+    if (ngx_event_flags & NGX_USE_EPOLL_EVENT) {
+        ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "recv: eof:%d, avail:%d",
+                       rev->pending_eof, rev->available);
+
+        if (!rev->available && !rev->pending_eof) {
+            rev->ready = 0;
+            return NGX_AGAIN;
+        }
+    }
+
+#endif
+
     do {
         n = recv(c->fd, buf, size, 0);
 
@@ -101,6 +116,24 @@
 
 #endif
 
+#if (NGX_HAVE_EPOLLRDHUP)
+
+            if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
+                && ngx_use_epoll_rdhup)
+            {
+                if ((size_t) n < size) {
+                    if (!rev->pending_eof) {
+                        rev->ready = 0;
+                    }
+
+                    rev->available = 0;
+                }
+
+                return n;
+            }
+
+#endif
+
             if ((size_t) n < size
                 && !(ngx_event_flags & NGX_USE_GREEDY_EVENT))
             {