Hi,

Attached is the *rough* patch which uses transient buckets in mod_sed output
filter.

Testing :
  I created a 30MB and 300MB text files and ran OutputSed commands on the file.
* Before the patch, process size (worker mpm with 1 thread) increased up to 
300M 
for single request.  After the  patch, process size remains to 3MB to server
300M response output.

I also removed 1 extra copying for processing output.

I need to add some more error handling to finalize the patch. Any comments are
welcome.

Regards,
Basant.

On Thu, Sep 04, 2008 at 09:47:26PM -0500, William A. Rowe, Jr. wrote:
> Basant Kukreja wrote:
>>
>> Based on your suggestion, I will check what are the other improvements from
>> mod_substitute can be brought into mod_sed.
>
> Note that mod_substitute's brigade handling is already based on the work of
> both Jim and Nick (author of mod_line_edit) - so they are pretty certain
> that it is the right approach.  Good idea to borrow from it.
>
> Bill
Index: modules/filters/mod_sed.c
===================================================================
--- modules/filters/mod_sed.c   (revision 692768)
+++ modules/filters/mod_sed.c   (working copy)
@@ -26,7 +26,8 @@
 #include "libsed.h"
 
 static const char *sed_filter_name = "Sed";
-#define MODSED_OUTBUF_SIZE 4000
+#define MODSED_OUTBUF_SIZE 8000
+#define MAX_TRANSIENT_BUCKETS 50
 
 typedef struct sed_expr_config
 {
@@ -44,11 +45,14 @@
 typedef struct sed_filter_ctxt
 {
     sed_eval_t eval;
+    ap_filter_t *f;
     request_rec *r;
     apr_bucket_brigade *bb;
     char *outbuf;
     char *curoutbuf;
     int bufsize;
+    apr_pool_t *tpool;
+    int numbuckets;
 } sed_filter_ctxt;
 
 module AP_MODULE_DECLARE_DATA sed_module;
@@ -71,29 +75,68 @@
     sed_cfg->last_error = error;
 }
 
+/* clear the temporary pool (used for transient buckets)
+ */
+static void clear_ctxpool(sed_filter_ctxt* ctx)
+{
+    apr_pool_clear(ctx->tpool);
+    ctx->outbuf = NULL;
+    ctx->curoutbuf = NULL;
+    ctx->numbuckets = 0;
+}
+
+/* alloc_outbuf
+ * allocate output buffer
+ */
+static void alloc_outbuf(sed_filter_ctxt* ctx)
+{
+    ctx->outbuf = apr_palloc(ctx->tpool, ctx->bufsize + 1);
+    ctx->curoutbuf = ctx->outbuf;
+}
+
+/* append_bucket
+ * Allocate a new bucket from buf and sz and append to ctx->bb
+ */
+static void append_bucket(sed_filter_ctxt* ctx, char* buf, int sz)
+{
+    int rv;
+    apr_bucket *b;
+    if (ctx->tpool == ctx->r->pool) {
+        /* We are not using transient bucket */
+        b = apr_bucket_pool_create(buf, sz, ctx->r->pool,
+                                   ctx->r->connection->bucket_alloc);
+        APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
+    }
+    else {
+        /* We are using transient bucket */
+        b = apr_bucket_transient_create(buf, sz,
+                                        ctx->r->connection->bucket_alloc);
+        APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
+        ctx->numbuckets++;
+        if (ctx->numbuckets >= MAX_TRANSIENT_BUCKETS) {
+            b = apr_bucket_flush_create(ctx->r->connection->bucket_alloc);
+            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
+            rv = ap_pass_brigade(ctx->f->next, ctx->bb);
+            apr_brigade_cleanup(ctx->bb);
+            clear_ctxpool(ctx);
+        }
+    }
+}
+
 /*
  * flush_output_buffer
  * Flush the  output data (stored in ctx->outbuf)
  */
-static void flush_output_buffer(sed_filter_ctxt *ctx, char* buf, int sz)
+static void flush_output_buffer(sed_filter_ctxt *ctx)
 {
     int size = ctx->curoutbuf - ctx->outbuf;
     char *out;
-    apr_bucket *b;
-    if (size + sz <= 0)
+    if ((ctx->outbuf == NULL) || (size <=0))
         return;
-    out = apr_palloc(ctx->r->pool, size + sz);
-    if (size) {
-        memcpy(out, ctx->outbuf, size);
-    }
-    if (buf && (sz > 0)) {
-        memcpy(out + size, buf, sz);
-    }
-    /* Reset the output buffer position */
+    out = apr_palloc(ctx->tpool, size);
+    memcpy(out, ctx->outbuf, size);
+    append_bucket(ctx, out, size);
     ctx->curoutbuf = ctx->outbuf;
-    b = apr_bucket_pool_create(out, size + sz, ctx->r->pool,
-                               ctx->r->connection->bucket_alloc);
-    APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
 }
 
 /* This is a call back function. When libsed wants to generate the output,
@@ -104,11 +147,38 @@
     /* dummy is basically filter context. Context is passed during invocation
      * of sed_eval_buffer
      */
+    int remainbytes = 0;
     sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
-    if (((ctx->curoutbuf - ctx->outbuf) + sz) >= ctx->bufsize) {
-        /* flush current buffer */
-        flush_output_buffer(ctx, buf, sz);
+    if (ctx->outbuf == NULL) {
+        alloc_outbuf(ctx);
     }
+    remainbytes = ctx->bufsize - (ctx->curoutbuf - ctx->outbuf);
+    if (sz >= remainbytes) {
+        if (remainbytes > 0) {
+            memcpy(ctx->curoutbuf, buf, remainbytes);
+            buf += remainbytes;
+            sz -= remainbytes;
+            ctx->curoutbuf += remainbytes;
+        }
+        /* buffer is now full */
+        append_bucket(ctx, ctx->outbuf, ctx->bufsize);
+        /* old buffer is now used so allocate new buffer */
+        alloc_outbuf(ctx);
+        /* if size is bigger than the allocated buffer directly add to output 
brigade */
+        if (sz >= ctx->bufsize) {
+            char* newbuf = apr_palloc(ctx->tpool, sz);
+            memcpy(newbuf, buf, sz);
+            append_bucket(ctx, newbuf, sz);
+            /* pool might get clear after append_bucket */
+            if (ctx->outbuf == NULL) {
+                alloc_outbuf(ctx);
+            }
+        }
+        else {
+            memcpy(ctx->curoutbuf, buf, sz);
+            ctx->curoutbuf += sz;
+        }
+    }
     else {
         memcpy(ctx->curoutbuf, buf, sz);
         ctx->curoutbuf += sz;
@@ -153,10 +223,11 @@
 
 /* Initialize sed filter context. If successful then context is set in f->ctx
  */
-static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg)
+static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg, int 
usetpool)
 {
     apr_status_t status;
     sed_filter_ctxt* ctx;
+    apr_pool_t *tpool;
     request_rec *r = f->r;
     /* Create the context. Call sed_init_eval. libsed will generated
      * output by calling sed_write_output and generates any error by
@@ -165,6 +236,8 @@
     ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
     ctx->r = r;
     ctx->bb = NULL;
+    ctx->numbuckets = 0;
+    ctx->f = f;
     status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
                            r, &sed_write_output, r->pool);
     if (status != APR_SUCCESS) {
@@ -173,8 +246,13 @@
     apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
                               apr_pool_cleanup_null);
     ctx->bufsize = MODSED_OUTBUF_SIZE;
-    ctx->outbuf = apr_palloc(r->pool, ctx->bufsize + 1);
-    ctx->curoutbuf = ctx->outbuf;
+    if (usetpool) {
+        apr_pool_create(&(ctx->tpool), r->pool);
+    }
+    else {
+        ctx->tpool = r->pool;
+    }
+    alloc_outbuf(ctx);
     f->ctx = ctx;
     return APR_SUCCESS;
 }
@@ -204,10 +282,11 @@
             return ap_pass_brigade(f->next, bb);
         }
 
-        status = init_context(f, sed_cfg);
+        status = init_context(f, sed_cfg, 1);
         if (status != APR_SUCCESS)
              return status;
         ctx = f->ctx;
+        apr_table_unset(f->r->headers_out, "Content-Length");
     }
 
     ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
@@ -239,7 +318,7 @@
             apr_bucket *b1 = APR_BUCKET_NEXT(b);
             /* Now clean up the internal sed buffer */
             sed_finalize_eval(&ctx->eval, ctx);
-            flush_output_buffer(ctx, NULL, 0);
+            flush_output_buffer(ctx);
             APR_BUCKET_REMOVE(b);
             /* Insert the eos bucket to ctx->bb brigade */
             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
@@ -248,12 +327,8 @@
         else if (APR_BUCKET_IS_FLUSH(b)) {
             apr_bucket *b1 = APR_BUCKET_NEXT(b);
             APR_BUCKET_REMOVE(b);
+            flush_output_buffer(ctx);
             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
-            status = ap_pass_brigade(f->next, ctx->bb);
-            apr_brigade_cleanup(ctx->bb);
-            if (status != APR_SUCCESS) {
-                return status;
-            }
             b = b1;
         }
         else if (APR_BUCKET_IS_METADATA(b)) {
@@ -264,9 +339,9 @@
             apr_bucket *b1 = APR_BUCKET_NEXT(b);
             status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
             if (status != APR_SUCCESS) {
+                clear_ctxpool(ctx);
                 return status;
             }
-            flush_output_buffer(ctx, NULL, 0);
             APR_BUCKET_REMOVE(b);
             apr_bucket_delete(b);
             b = b1;
@@ -278,7 +353,14 @@
         }
     }
     apr_brigade_cleanup(bb);
-    return ap_pass_brigade(f->next, ctx->bb);
+    flush_output_buffer(ctx);
+    status = APR_SUCCESS;
+    if (!APR_BRIGADE_EMPTY(ctx->bb)) {
+        status = ap_pass_brigade(f->next, ctx->bb);
+        apr_brigade_cleanup(ctx->bb);
+    }
+    clear_ctxpool(ctx);
+    return status;
 }
 
 /* Entry function for Sed input filter */
@@ -309,7 +391,7 @@
             /* XXX : Should we filter the sub requests too */
             return ap_get_brigade(f->next, bb, mode, block, readbytes);
         }
-        status = init_context(f, sed_cfg);
+        status = init_context(f, sed_cfg, 0);
         if (status != APR_SUCCESS)
              return status;
         ctx = f->ctx;
@@ -352,7 +434,7 @@
             if (APR_BUCKET_IS_EOS(b)) {
                 /* eos bucket. Clear the internal sed buffers */
                 sed_finalize_eval(&ctx->eval, ctx);
-                flush_output_buffer(ctx, NULL, 0);
+                flush_output_buffer(ctx);
                 APR_BUCKET_REMOVE(b);
                 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
                 break;
@@ -366,7 +448,7 @@
                 status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
                 if (status != APR_SUCCESS)
                     return status;
-                flush_output_buffer(ctx, NULL, 0);
+                flush_output_buffer(ctx);
             }
         }
         apr_brigade_cleanup(bbinp);

Reply via email to