> Furthermore from a first glance of mod_sed I would guess that it runs into
> trouble with large static files. So could you test mod_sed with one of
> the simple cases above and a 3 GB file and monitor httpd's memory consumption
> in comparison to mod_substitute?
I did testing with large files. Memory consumption was not a problem with large
files but response time was a big problem.
The performance of mod_sed was terrible with large files. I was not buffering
the output data in sed code which was causing too many buckets creation. This
didn't cause much performance drop for small files but for big files, it made
a large difference. I introduced the 4K buffer and that's solved the problem.

For a 3M file :
download time : 255 seconds
After my changes :
download time : 0.5 seconds

I also tried with 300M file and response time/memory consumption was not bad.

I have revised the mod_sed code and new code can be retrieve from mercurial :
$ hg clone ssh://[EMAIL PROTECTED]/hg/webstack/mod_sed

It should soon be visible on http url too.
http://src.opensolaris.org/source/xref/webstack/mod_sed/

Diffs are attached at the bottom. With these new changes here are the
performance comparison results. The changes improved performance in general.

Performance result with changes in mod_sed and trunk version of mod_substitute.
Apache : 2.2.6 64 bit worker mpm.
OS : Solaris snv_79
---------------------------------------------------------------------------------
test2.html : 500 substitution s/hello/hi/g.
Requests Per Second (RPS) as measured by ab for s/hello/hi/ substitution.
Enabled CPUs : 4
testname       Concur  MaxReq  mod_sed    mod_line_edit   mod_substitute  
mod_substitute
                                RPS         RPS               RPS         (with 
n flag) RPS
test2.html       400   400000   2788         1363              2557             
2811

Summary :
On simple string substitution (s/hello/hi/) mod_sed perform better than
mod_substitue without [n] parameter and it is close to mod_substitute with [n]
parameter.
---------------------------------------------------------------------------------
Test case summary : test for simple regular expression.
Regular expression used : OutputSed 's/.*one.*two/1 2/'
                        : Substitute "s/.*one.*two/1 2/"
String : hello one 1 two 2 three 3 four 4 five 5.
Enabled CPUs : 4
test22.html : 500 lines containing the above string (Result in 500 
substitution).
testname       Concur  MaxReq  mod_sed    mod_substitute  
                                RPS         RPS          
test22.html     400    400000   2168        1688
Summary :
With ordinary regular expression, mod_sed performed better than
mod_substitute.
---------------------------------------------------------------------------------
Test case summary : pattern capture
Regular expression used : OutputSed 
's/.*\(one\).*\(two\).*\(three\).*\(four\).*\(five\).*/\5 \4 \3 \2 \1/'
                        : Substitute 
"s/.*(one).*(two).*(three).*(four).*(five).*/$5 $4 $3 $2 $1/"
String : hello one 1 two 2 three 3 four 4 five 5.
Enabled CPUs : 4
test12.html : 500 lines containing the above string (Result in 500 
substitution).
testname       Concur  MaxReq  mod_sed    mod_substitute  
                                RPS         RPS          
test12.html     400    400000   1523        3135
Summary :
With pattern capture, mod_substitute performed much better than mod_sed.
---------------------------------------------------------------------------------

Regards,
Basant.

diff -r b16bfdb8759b libsed.h
--- a/libsed.h  Fri Apr 11 15:00:36 2008 -0700
+++ b/libsed.h  Fri Apr 11 15:04:44 2008 -0700
@@ -133,6 +133,10 @@ struct sed_eval_s {
     char           *gbend;
     char           *lcomend;
 
+    char           *outbuf;
+    char           *curoutbuf;
+    char           *outbend;
+
     apr_file_t    *fcode[NWFILES];
     sed_reptr_t    *abuf[SED_ABUFSIZE];
     sed_reptr_t    **aptr;
diff -r b16bfdb8759b sed1.c
--- a/sed1.c    Fri Apr 11 15:00:36 2008 -0700
+++ b/sed1.c    Fri Apr 11 15:04:37 2008 -0700
@@ -73,6 +73,8 @@ static apr_status_t command(sed_eval_t *
                             step_vars_storage *step_vars);
 static void wline(sed_eval_t *eval, char *buf, int sz);
 static void arout(sed_eval_t *eval);
+static void sed_write(sed_eval_t *eval, char *buf, int sz);
+static void sed_flush_output_buffer(sed_eval_t *eval, char* buf, int sz);
 
 static void eval_errf(sed_eval_t *eval, const char *fmt, ...)
 {
@@ -124,6 +126,13 @@ apr_status_t sed_reset_eval(sed_eval_t *
         eval->genbuf = apr_palloc(eval->pool, LBSIZE + 1);
         eval->gbend = eval->genbuf + LBSIZE;
     }
+    if (eval->outbuf == NULL) {
+        int bufsize = 4 * LBSIZE;
+        eval->outbuf = apr_palloc(eval->pool, bufsize + 1);
+        eval->outbend = eval->outbuf + bufsize;
+    }
+    eval->curoutbuf = eval->outbuf;
+
     eval->lspend = eval->linebuf;
     eval->hspend = eval->holdbuf;
     eval->lcomend = &eval->genbuf[71];
@@ -253,6 +262,7 @@ apr_status_t sed_eval_buffer(sed_eval_t 
             break;
     }
 
+    sed_flush_output_buffer(eval, NULL, 0);
     /* Save the leftovers for later */
     if (bufsz) {
         if ((eval->linebuf + LBSIZE) <= eval->lspend + bufsz) {
@@ -298,6 +308,7 @@ apr_status_t sed_finalize_eval(sed_eval_
         if (rv != 0)
             return APR_EGENERAL;
     }
+    sed_flush_output_buffer(eval, NULL, 0);
 
     eval->quitflag = 1;
 
@@ -840,7 +851,7 @@ static void arout(sed_eval_t *eval)
             while ((apr_file_read(fi, buf, &n)) == APR_SUCCESS) {
                 if (n == 0)
                     break;
-                eval->writefn(eval->fout, apr_pmemdup(eval->pool, buf, n), n);
+                sed_write(eval, buf, n);
                 n = sizeof(buf);
             }
             apr_file_close(fi);
@@ -851,13 +862,48 @@ static void arout(sed_eval_t *eval)
 }
 
 /*
+ * sed_flush_output_buffer
+ * Flush the  output data (stored in eval->outbuf)
+ */
+static void sed_flush_output_buffer(sed_eval_t *eval, char* buf, int sz)
+{
+    int size = eval->curoutbuf - eval->outbuf;
+    char *out;
+    if (size + sz <= 0)
+        return;
+    out = apr_palloc(eval->pool, size + sz);
+    if (size) {
+        memcpy(out, eval->outbuf, size);
+    }
+    if (buf && (sz > 0)) {
+        memcpy(out + size, buf, sz);
+    }
+    eval->writefn(eval->fout, out, size + sz);
+    /* Reset the output buffer position */
+    eval->curoutbuf = eval->outbuf;
+}
+
+/*
+ * sed_write
+ */
+static void sed_write(sed_eval_t *eval, char *buf, int sz)
+{
+    if (eval->curoutbuf + sz >= eval->outbend) {
+        // flush current buffer
+        sed_flush_output_buffer(eval, buf, sz);
+    }
+    else {
+        memcpy(eval->curoutbuf, buf, sz);
+        eval->curoutbuf += sz;
+    }
+}
+
+/*
  * wline
  */
 static void wline(sed_eval_t *eval, char *buf, int sz)
 {
-    char *out = apr_palloc(eval->pool, sz + 1);
-    memcpy(out, buf, sz);
-    *(out + sz) = '\n';
-    eval->writefn(eval->fout, out, sz + 1);
-}
-
+    sed_write(eval, buf, sz);
+    sed_write(eval, "\n", 1);
+}
+

Reply via email to