> Furthermore from a first glance of mod_sed I would guess that it runs into > trouble with large static files. So could you test mod_sed with one of > the simple cases above and a 3 GB file and monitor httpd's memory consumption > in comparison to mod_substitute? I did testing with large files. Memory consumption was not a problem with large files but response time was a big problem. The performance of mod_sed was terrible with large files. I was not buffering the output data in sed code which was causing too many buckets creation. This didn't cause much performance drop for small files but for big files, it made a large difference. I introduced the 4K buffer and that's solved the problem.
For a 3M file : download time : 255 seconds After my changes : download time : 0.5 seconds I also tried with 300M file and response time/memory consumption was not bad. I have revised the mod_sed code and new code can be retrieve from mercurial : $ hg clone ssh://[EMAIL PROTECTED]/hg/webstack/mod_sed It should soon be visible on http url too. http://src.opensolaris.org/source/xref/webstack/mod_sed/ Diffs are attached at the bottom. With these new changes here are the performance comparison results. The changes improved performance in general. Performance result with changes in mod_sed and trunk version of mod_substitute. Apache : 2.2.6 64 bit worker mpm. OS : Solaris snv_79 --------------------------------------------------------------------------------- test2.html : 500 substitution s/hello/hi/g. Requests Per Second (RPS) as measured by ab for s/hello/hi/ substitution. Enabled CPUs : 4 testname Concur MaxReq mod_sed mod_line_edit mod_substitute mod_substitute RPS RPS RPS (with n flag) RPS test2.html 400 400000 2788 1363 2557 2811 Summary : On simple string substitution (s/hello/hi/) mod_sed perform better than mod_substitue without [n] parameter and it is close to mod_substitute with [n] parameter. --------------------------------------------------------------------------------- Test case summary : test for simple regular expression. Regular expression used : OutputSed 's/.*one.*two/1 2/' : Substitute "s/.*one.*two/1 2/" String : hello one 1 two 2 three 3 four 4 five 5. Enabled CPUs : 4 test22.html : 500 lines containing the above string (Result in 500 substitution). testname Concur MaxReq mod_sed mod_substitute RPS RPS test22.html 400 400000 2168 1688 Summary : With ordinary regular expression, mod_sed performed better than mod_substitute. --------------------------------------------------------------------------------- Test case summary : pattern capture Regular expression used : OutputSed 's/.*\(one\).*\(two\).*\(three\).*\(four\).*\(five\).*/\5 \4 \3 \2 \1/' : Substitute "s/.*(one).*(two).*(three).*(four).*(five).*/$5 $4 $3 $2 $1/" String : hello one 1 two 2 three 3 four 4 five 5. Enabled CPUs : 4 test12.html : 500 lines containing the above string (Result in 500 substitution). testname Concur MaxReq mod_sed mod_substitute RPS RPS test12.html 400 400000 1523 3135 Summary : With pattern capture, mod_substitute performed much better than mod_sed. --------------------------------------------------------------------------------- Regards, Basant. diff -r b16bfdb8759b libsed.h --- a/libsed.h Fri Apr 11 15:00:36 2008 -0700 +++ b/libsed.h Fri Apr 11 15:04:44 2008 -0700 @@ -133,6 +133,10 @@ struct sed_eval_s { char *gbend; char *lcomend; + char *outbuf; + char *curoutbuf; + char *outbend; + apr_file_t *fcode[NWFILES]; sed_reptr_t *abuf[SED_ABUFSIZE]; sed_reptr_t **aptr; diff -r b16bfdb8759b sed1.c --- a/sed1.c Fri Apr 11 15:00:36 2008 -0700 +++ b/sed1.c Fri Apr 11 15:04:37 2008 -0700 @@ -73,6 +73,8 @@ static apr_status_t command(sed_eval_t * step_vars_storage *step_vars); static void wline(sed_eval_t *eval, char *buf, int sz); static void arout(sed_eval_t *eval); +static void sed_write(sed_eval_t *eval, char *buf, int sz); +static void sed_flush_output_buffer(sed_eval_t *eval, char* buf, int sz); static void eval_errf(sed_eval_t *eval, const char *fmt, ...) { @@ -124,6 +126,13 @@ apr_status_t sed_reset_eval(sed_eval_t * eval->genbuf = apr_palloc(eval->pool, LBSIZE + 1); eval->gbend = eval->genbuf + LBSIZE; } + if (eval->outbuf == NULL) { + int bufsize = 4 * LBSIZE; + eval->outbuf = apr_palloc(eval->pool, bufsize + 1); + eval->outbend = eval->outbuf + bufsize; + } + eval->curoutbuf = eval->outbuf; + eval->lspend = eval->linebuf; eval->hspend = eval->holdbuf; eval->lcomend = &eval->genbuf[71]; @@ -253,6 +262,7 @@ apr_status_t sed_eval_buffer(sed_eval_t break; } + sed_flush_output_buffer(eval, NULL, 0); /* Save the leftovers for later */ if (bufsz) { if ((eval->linebuf + LBSIZE) <= eval->lspend + bufsz) { @@ -298,6 +308,7 @@ apr_status_t sed_finalize_eval(sed_eval_ if (rv != 0) return APR_EGENERAL; } + sed_flush_output_buffer(eval, NULL, 0); eval->quitflag = 1; @@ -840,7 +851,7 @@ static void arout(sed_eval_t *eval) while ((apr_file_read(fi, buf, &n)) == APR_SUCCESS) { if (n == 0) break; - eval->writefn(eval->fout, apr_pmemdup(eval->pool, buf, n), n); + sed_write(eval, buf, n); n = sizeof(buf); } apr_file_close(fi); @@ -851,13 +862,48 @@ static void arout(sed_eval_t *eval) } /* + * sed_flush_output_buffer + * Flush the output data (stored in eval->outbuf) + */ +static void sed_flush_output_buffer(sed_eval_t *eval, char* buf, int sz) +{ + int size = eval->curoutbuf - eval->outbuf; + char *out; + if (size + sz <= 0) + return; + out = apr_palloc(eval->pool, size + sz); + if (size) { + memcpy(out, eval->outbuf, size); + } + if (buf && (sz > 0)) { + memcpy(out + size, buf, sz); + } + eval->writefn(eval->fout, out, size + sz); + /* Reset the output buffer position */ + eval->curoutbuf = eval->outbuf; +} + +/* + * sed_write + */ +static void sed_write(sed_eval_t *eval, char *buf, int sz) +{ + if (eval->curoutbuf + sz >= eval->outbend) { + // flush current buffer + sed_flush_output_buffer(eval, buf, sz); + } + else { + memcpy(eval->curoutbuf, buf, sz); + eval->curoutbuf += sz; + } +} + +/* * wline */ static void wline(sed_eval_t *eval, char *buf, int sz) { - char *out = apr_palloc(eval->pool, sz + 1); - memcpy(out, buf, sz); - *(out + sz) = '\n'; - eval->writefn(eval->fout, out, sz + 1); -} - + sed_write(eval, buf, sz); + sed_write(eval, "\n", 1); +} +