changeset 6605:f379b32e4733

Sub filter: eliminate unnecessary buffering. Previously, when a buffer was processed by the sub filter, its final bytes could be buffered by the filter even if they don't match any pattern. This happened because the Boyer-Moore algorithm, employed by the sub filter since b9447fc457b4 (1.9.4), matches the last characters of patterns prior to checking other characters. If the last character is out of scope, initial bytes of a potential match are buffered until the last character is available. Now, after receiving a flush or recycled buffer, the filter performs additional checks to reduce the number of buffered bytes. The potential match is checked against the initial parts of all patterns. Non-matching bytes are not buffered. This improves processing of a chunked response from upstream by sending the entire chunks without buffering unless a partial match is found at the end of a chunk.
author Roman Arutyunyan <arut@nginx.com>
date Sat, 02 Jul 2016 15:59:53 +0300
parents 0e0adbbc8752
children 2f41d383c9c7
files src/http/modules/ngx_http_sub_filter_module.c
diffstat 1 files changed, 37 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/src/http/modules/ngx_http_sub_filter_module.c	Sat Jul 02 15:59:52 2016 +0300
+++ b/src/http/modules/ngx_http_sub_filter_module.c	Sat Jul 02 15:59:53 2016 +0300
@@ -83,7 +83,7 @@
 static ngx_int_t ngx_http_sub_output(ngx_http_request_t *r,
     ngx_http_sub_ctx_t *ctx);
 static ngx_int_t ngx_http_sub_parse(ngx_http_request_t *r,
-    ngx_http_sub_ctx_t *ctx);
+    ngx_http_sub_ctx_t *ctx, ngx_uint_t flush);
 static ngx_int_t ngx_http_sub_match(ngx_http_sub_ctx_t *ctx, ngx_int_t start,
     ngx_str_t *m);
 
@@ -287,6 +287,7 @@
     ngx_int_t                  rc;
     ngx_buf_t                 *b;
     ngx_str_t                 *sub;
+    ngx_uint_t                 flush, last;
     ngx_chain_t               *cl;
     ngx_http_sub_ctx_t        *ctx;
     ngx_http_sub_match_t      *match;
@@ -328,6 +329,9 @@
     ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
                    "http sub filter \"%V\"", &r->uri);
 
+    flush = 0;
+    last = 0;
+
     while (ctx->in || ctx->buf) {
 
         if (ctx->buf == NULL) {
@@ -336,11 +340,19 @@
             ctx->pos = ctx->buf->pos;
         }
 
+        if (ctx->buf->flush || ctx->buf->recycled) {
+            flush = 1;
+        }
+
+        if (ctx->in == NULL) {
+            last = flush;
+        }
+
         b = NULL;
 
         while (ctx->pos < ctx->buf->last) {
 
-            rc = ngx_http_sub_parse(r, ctx);
+            rc = ngx_http_sub_parse(r, ctx, last);
 
             ngx_log_debug4(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
                            "parse: %i, looked: \"%V\" %p-%p",
@@ -592,7 +604,8 @@
 
 
 static ngx_int_t
-ngx_http_sub_parse(ngx_http_request_t *r, ngx_http_sub_ctx_t *ctx)
+ngx_http_sub_parse(ngx_http_request_t *r, ngx_http_sub_ctx_t *ctx,
+    ngx_uint_t flush)
 {
     u_char                   *p, c;
     ngx_str_t                *m;
@@ -604,6 +617,7 @@
 
     slcf = ngx_http_get_module_loc_conf(r, ngx_http_sub_filter_module);
     tables = ctx->tables;
+    match = ctx->matches->elts;
 
     offset = ctx->offset;
     end = ctx->buf->last - ctx->pos;
@@ -630,7 +644,6 @@
         /* a potential match */
 
         start = offset - (ngx_int_t) tables->min_match_len + 1;
-        match = ctx->matches->elts;
 
         i = ngx_max(tables->index[c], ctx->index);
         j = tables->index[c + 1];
@@ -671,6 +684,26 @@
         ctx->index = 0;
     }
 
+    if (flush) {
+        for ( ;; ) {
+            start = offset - (ngx_int_t) tables->min_match_len + 1;
+
+            if (start >= end) {
+                break;
+            }
+
+            for (i = 0; i < ctx->matches->nelts; i++) {
+                m = &match[i].match;
+
+                if (ngx_http_sub_match(ctx, start, m) == NGX_AGAIN) {
+                    goto again;
+                }
+            }
+
+            offset++;
+        }
+    }
+
 again:
 
     ctx->offset = offset;