Doug MacEachern <[EMAIL PROTECTED]> writes:
> > my $args = $q->param; # hash ref
>
> you mean parms() ? the Apache::Request::parms hash ref is tied, so
> there are still method calls, but less than calling params(), which does
> extra stuff to emulate CGI::params.
I just looked at line 21 from Request.pm; it looks like $q->param() returns
the same thing as $q->parms does; but surely $q->parms is even better!
> parms() is going to be renamed (to something less like params()) and
> documented as faster than using the params() wrapper, in the next release.
A new libapreq release? Great news! Here's YAP for libapreq - I added Dave Mitchell's
memmem in multipart_buffer.c for better portability, and made some minor changes
to apache_request.c to eliminate some unnecessary copying. I'd be glad to send
you a url to a production server, if you'd like to see it in action.
HTH, and thanks again.
--
Joe Schaefer
[EMAIL PROTECTED]
SunStar Systems, Inc.
diff -ur libapreq-0.31/c/apache_request.c libapreq/c/apache_request.c
--- libapreq-0.31/c/apache_request.c Fri Jul 2 21:00:17 1999
+++ libapreq/c/apache_request.c Sun Sep 24 22:10:18 2000
@@ -64,8 +64,20 @@
for(x=0;str[x];x++) if(str[x] == '+') str[x] = ' ';
}
-static int util_read(ApacheRequest *req, const char **rbuf)
+static int util_read(ApacheRequest *req, char **rbuf)
{
+ /* could make pointer array (LoL) to capture growth
+ * p[1] -> [...........\0]
+ * p[2] -> [........\0]
+ * p[3] -> [......................\0]
+ *
+ * would need hi-tech flushing routine (per string)
+ * also need a hi-tech reader. is it really worth the trouble?
+ *
+ *
+ *
+ */
+
request_rec *r = req->r;
int rc = OK;
@@ -84,9 +96,9 @@
return HTTP_REQUEST_ENTITY_TOO_LARGE;
}
- *rbuf = ap_pcalloc(r->pool, length + 1);
+ *rbuf = ap_pcalloc(r->pool, length + 1); /* can we improve this? */
- ap_hard_timeout("[libapreq] util_read", r);
+ ap_hard_timeout("[libapreq] util_parse", r);
while ((len_read =
ap_get_client_block(r, buff, sizeof(buff))) > 0) {
@@ -234,56 +246,56 @@
return req;
}
-static int urlword_dlm[] = {'&', ';', 0};
+/* static int urlword_dlm[] = {'&', ';', 0}; */
-static char *my_urlword(pool *p, const char **line)
+char *my_urlword(ApacheRequest *req, char **line)
{
- int i;
-
- for (i = 0; urlword_dlm[i]; i++) {
- int stop = urlword_dlm[i];
- char *pos = strchr(*line, stop);
- char *res;
-
- if (!pos) {
- if (!urlword_dlm[i+1]) {
- int len = strlen(*line);
- res = ap_pstrndup(p, *line, len);
- *line += len;
- return res;
- }
- continue;
- }
+ char dlm[] = "&;";
+ long int len;
+ char *word, *dlm_ptr;
- res = ap_pstrndup(p, *line, pos - *line);
+ if (! *line) { return NULL; }
- while (*pos == stop) {
- ++pos;
- }
+ dlm_ptr = strpbrk(*line, dlm);
- *line = pos;
+ if (!dlm_ptr) {
+ word = *line;
+ *line = NULL;
+ return word;
- return res;
+ } else {
+ *dlm_ptr = '\0';
+ word = *line;
+ *line = dlm_ptr + 1;
+ return word;
}
-
- return NULL;
}
-static void split_to_parms(ApacheRequest *req, const char *data)
+static void split_to_parms(ApacheRequest *req, char *data)
{
request_rec *r = req->r;
- const char *val;
-
- while (*data && (val = my_urlword(r->pool, &data))) {
- const char *key = ap_getword(r->pool, &val, '=');
+ char dlm[] = "&;";
+ char *word;
+ char *val;
+
+ do {
+ word = my_urlword(req, &data); /* modifies data */
+
+ if (!(val = strchr( word, '='))) {
+ continue; /* ignores words w/o an = sign */
+ }
+
+ *val = '\0';
+ ++val;
- req_plustospace((char*)key);
- ap_unescape_url((char*)key);
req_plustospace((char*)val);
ap_unescape_url((char*)val);
+ req_plustospace((char*)word);
+ ap_unescape_url((char*)word);
+
+ ap_table_add(req->parms, word, val);
- ap_table_add(req->parms, key, val);
- }
+ } while ( data ) ;
}
@@ -293,7 +305,8 @@
int rc = OK;
if (r->method_number == M_POST) {
- const char *data, *type;
+ char *data = NULL;
+ const char *type;
type = ap_table_get(r->headers_in, "Content-Type");
@@ -304,12 +317,13 @@
return rc;
}
if (data) {
- split_to_parms(req, data);
+ split_to_parms(req, data);
}
+
}
if (r->args) {
- split_to_parms(req, r->args);
+ split_to_parms(req, ap_pstrdup(r->pool,r->args));
}
return OK;
@@ -439,7 +453,7 @@
}
if (r->args) {
- split_to_parms(req, r->args);
+ split_to_parms(req, ap_pstrdup(r->pool,r->args));
}
return OK;
diff -ur libapreq-0.31/c/multipart_buffer.c libapreq/c/multipart_buffer.c
--- libapreq-0.31/c/multipart_buffer.c Sat May 1 02:44:28 1999
+++ libapreq/c/multipart_buffer.c Fri Sep 22 02:14:25 2000
@@ -55,134 +55,180 @@
*
*/
+/* JS: 6/30/2000
+ * This should fix the memory allocation issues, and handle client disconnects
+ * gracefully. Comments in the code should explain what I think is going on.
+ */
+
+
#include "multipart_buffer.h"
-#define FILLUNIT (1024 * 5)
#ifndef CRLF
#define CRLF "\015\012"
#endif
#define CRLF_CRLF "\015\012\015\012"
+#define min(A, B) ((A) < (B) ? (A) : (B))
+
+#define DEBUG 0 /* 0 = off, 1 = noisy */
static char *my_join(pool *p, char *one, int lenone, char *two, int lentwo)
{
- char *res;
- int len = lenone+lentwo;
- res = (char *)ap_palloc(p, len + 1);
- memcpy(res, one, lenone);
+ char *res;
+ /* p is typically self->subp so we can free this at lines 229, 345 */
+ res = (char *)ap_palloc(p, lenone + lentwo + 1);
+ memcpy(res, one, lenone);
memcpy(res+lenone, two, lentwo);
+ *(res+lenone+lentwo+1) = '\0'; /* be nice to character strings */
return res;
}
-static char *
-my_ninstr(register char *big, register char *bigend, char *little, char *lend)
-{
- register char *s, *x;
- register int first = *little;
- register char *littleend = lend;
+/* Dave Mitchell's memmem */
+
+void *my_memmem( const void*buf, size_t buf_len, const void *s, size_t s_len)
+{
+ void *p, *q;
+ size_t n;
+ unsigned int c;
- if (!first && little >= littleend) {
- return big;
- }
- if (bigend - big < littleend - little) {
- return NULL;
- }
- bigend -= littleend - little++;
- while (big <= bigend) {
- if (*big++ != first) {
- continue;
- }
- for (x=big,s=little; s < littleend; /**/ ) {
- if (*s++ != *x++) {
- s--;
- break;
- }
- }
- if (s >= littleend) {
- return big-1;
- }
+ if (s_len > buf_len || s_len == 0 || buf_len == 0) return NULL;
+ p = (void *) buf;
+ c = *((unsigned char *) s);
+ for (n = buf_len - s_len + 1; n > 0; n--, p++) {
+ q = memchr(p,c,n);
+ if (!q) return NULL;
+ n -= (q-p);
+ p = q;
+ if (memcmp(p,s,s_len) == 0) return p;
+ }
+ return NULL;
+}
+
+void multipart_buffer_flush(multipart_buffer *self)
+{
+ /* shifts all unread data in the buffer to the beginning of the buffer */
+
+ char *ptr = self->buffer;
+ while ( ptr < self->buffer + self->buffer_len) {
+ *ptr = *(ptr + self->buffer_off);
+ ++ptr;
}
- return NULL;
-}
+
+ self->buffer_off = 0;
+}
+
/*
* This fills up our internal buffer in such a way that the
- * boundary is never split between reads
+ * boundary is never split between reads. Returns number of bytes read,
+ * -1 on dropped connection, 0 if EOF (same as ap_get_client_block) OR
+ * IF bytes == 0 !
*/
-void multipart_buffer_fill(multipart_buffer *self, long bytes)
+
+int multipart_buffer_fill(multipart_buffer *self, long bytes)
{
- int len_read, length, bytes_to_read;
- int buffer_len = self->buffer_len;
+
+ int len_read=0, length, bytes_to_read, total=0;
if (!self->length) {
return;
}
- length = (bytes - buffer_len) + self->boundary_length + 2;
- if (self->length < length) {
- length = self->length;
+
+ bytes_to_read = min( (BUFSIZE) - self->buffer_len,
+ min( bytes, self->length) );
+
+ if ( bytes_to_read + self->buffer_off + self->buffer_len > BUFSIZE )
+ { /* overflow condition - need to flush stale data */
+ multipart_buffer_flush(self);
}
- bytes_to_read = length;
+
+#if DEBUG == 1
+ ap_log_rerror(MPB_ERROR,
+ "[libapreq]: BUFFER_FILL: SIZE=%d, OFF=%d, LENGTH=%d",
+ bytes_to_read, self->buffer_off,
+ self->buffer_len);
+#endif /* DEBUG */
ap_hard_timeout("multipart_buffer_fill", self->r);
+
+ total = bytes_to_read;
+
while (bytes_to_read > 0) {
- /*XXX: we can optimize this loop*/
- char *buff = (char *)ap_pcalloc(self->subp,
- sizeof(char) * bytes_to_read + 1);
- len_read = ap_get_client_block(self->r, buff, bytes_to_read);
+
+ char *buff = self->buffer + self->buffer_len + self->buffer_off;
+ len_read = ap_get_client_block(self->r,
+ buff,
+ bytes_to_read);
if (len_read < 0) {
ap_log_rerror(MPB_ERROR,
"[libapreq] client dropped connection during read");
self->length = 0;
- self->buffer = NULL;
self->buffer_len = 0;
- return;
+ return len_read;
+ }
+
+ if (len_read == 0) { /* premature EOF */
+ break;
}
- self->buffer = self->buffer ?
- my_join(self->r->pool,
- self->buffer, self->buffer_len,
- buff, len_read) :
- ap_pstrndup(self->r->pool, buff, len_read);
-
- self->total += len_read;
self->buffer_len += len_read;
- self->length -= len_read;
- bytes_to_read -= len_read;
+ bytes_to_read -= len_read;
ap_reset_timeout(self->r);
}
+
+ self->total += total;
+ self->length -= total;
+
ap_kill_timeout(self->r);
- ap_clear_pool(self->subp);
+
+ return total;
}
char *multipart_buffer_read(multipart_buffer *self, long bytes, int *blen)
{
int start = -1;
- char *retval, *str;
-
- /* default number of bytes to read */
+ char *mem, *retval;
+
+ /* default number of bytes to read - max */
if (!bytes) {
- bytes = FILLUNIT;
+ bytes = FILLUNIT;
}
/*
* Fill up our internal buffer in such a way that the boundary
* is never split between reads.
*/
- multipart_buffer_fill(self, bytes);
- /* Find the boundary in the buffer (it may not be there). */
- if (self->buffer) {
- if ((str = my_ninstr(self->buffer,
- self->buffer+self->buffer_len,
- self->boundary,
- self->boundary+self->boundary_length)))
- {
- start = str - self->buffer;
+ while ( self->buffer_len < self->boundary_length + 2 ) {
+ if ( multipart_buffer_fill(self, bytes) <= 0 ) {
+ self->length = 0; /* prevent further read attempts */
+ return NULL;
}
- }
+ }
+
+
+#if DEBUG == 1
+ ap_log_rerror(MPB_ERROR,
+ "[libapreq]: BUFFER_READ: SIZE=%d, OFF=%d, LENGTH=%d\n",
+ self->length, self->buffer_off,
+ self->buffer_len, self->buffer + self->buffer_off);
+#endif /* DEBUG */
+
+
+
+ if (self->buffer_len == 0) { return NULL; }
+
+ /* Find the boundary in the buffer (it may not be there). */
+
+ if (mem = my_memmem( self->buffer + self->buffer_off, self->buffer_len,
+ self->boundary, self->boundary_length ) )
+ {
+ start = mem - ( self->buffer + self->buffer_off );
+ }
/* protect against malformed multipart POST operations */
+
if (!(start >= 0 || self->length > 0)) {
ap_log_rerror(MPB_ERROR,
"[libapreq] malformed upload: start=%d, self->length=%d",
@@ -195,109 +241,166 @@
* and return NULL. The +2 here is a fiendish plot to
* remove the CR/LF pair at the end of the boundary.
*/
+
if (start == 0) {
- /* clear us out completely if we've hit the last boundary. */
- if (strEQ(self->buffer, self->boundary_end)) {
- self->buffer = NULL;
- self->buffer_len = 0;
+ if ( memcmp(self->buffer + self->buffer_off,
+ self->boundary_end, self->boundary_length+2) == 0 ) {
self->length = 0;
+ self->buffer_len = 0;
+ return NULL;
+ } else {
+ self->buffer_off += (self->boundary_length + 2);
+ self->buffer_len -= (self->boundary_length + 2);
return NULL;
}
-
- /* otherwise just remove the boundary. */
- self->buffer += (self->boundary_length + 2);
- self->buffer_len -= (self->boundary_length + 2);
- return NULL;
}
- if (start > 0) { /* read up to the boundary */
- *blen = start > bytes ? bytes : start;
- }
- else { /* read the requested number of bytes */
- /*
+ /* read the requested number of bytes, and
* leave enough bytes in the buffer to allow us to read
* the boundary. Thanks to Kevin Hendrick for finding
* this one.
*/
- *blen = bytes - (self->boundary_length + 1);
+
+ *blen = (start > 0) ? start :
+ self->buffer_len - (self->boundary_length + 1);
+
+ *blen = min(*blen, bytes);
+
+ retval = self->buffer + self->buffer_off;
+ self->buffer_len -= *blen;
+
+ if (!self->buffer_len) {
+ self->buffer_off = 0; /* no data left, so we reset buffer */
+ } else {
+ self->buffer_off += *blen;
}
-
- retval = ap_pstrndup(self->r->pool, self->buffer, *blen);
-
- self->buffer += *blen;
- self->buffer_len -= *blen;
/* If we hit the boundary, remove the CRLF from the end. */
+
if (start > 0) {
*blen -= 2;
retval[*blen] = '\0';
}
- return retval;
+ return retval; /* retval is volatile, and must be copied immediately. */
}
table *multipart_buffer_headers(multipart_buffer *self)
{
- int end=0, ok=0, bad=0;
+ int end=-1, length=0, bytes=FILLUNIT;
table *tab;
char *header;
+ char *entry;
+
+ /* This loop will ultimately allocate roughly T * (N+1) / 2
+ * bytes of memory - here T is the total size of input data to be read,
+ * and N is the number of loops required to read it all in, i.e.
+ * N = T / bytes. As this is a header read, there's no good reason
+ * why T is more than 1KB in size, and hence this loop should
+ * only redo itself at most once.
+ */
+
+ do {
+
+ char *mem;
+
+ multipart_buffer_fill(self, bytes);
+
+ if (mem = my_memmem( self->buffer + self->buffer_off, self->buffer_len,
+ CRLF_CRLF, 4))
+ {
+ /* have final header 'fragment' in the buffer */
+
+ end = mem - ( self->buffer + self->buffer_off );
+
+ header = my_join( self->subp,
+ header, length,
+ self->buffer + self->buffer_off, end+2 );
+
+ length += end+2;
+ self->buffer_off += end+4;
+ self->buffer_len -= end+4;
+
+ } else if (self->buffer_len > 3) {
+ /* fetch entire header fragment */
+
+ header = my_join( self->subp,
+ header, length,
+ self->buffer + self->buffer_off, self->buffer_len-3);
+
+ length += self->buffer_len - 3;
+ self->buffer_off += self->buffer_len - 3;
+ self->buffer_len = 3;
+
+ } else if (self->length <= 0) {
+ /* bad header read, check out early */
+
+ ap_log_rerror( MPB_ERROR, "[libapreq]: Bad header read near EOF." );
+ return NULL;
- do {
- char *str;
- multipart_buffer_fill(self, FILLUNIT);
- if ((str = strstr(self->buffer, CRLF_CRLF))) {
- ++ok;
- end = str - self->buffer;
- }
- if (self->buffer == NULL || *self->buffer == '\0') {
- ++ok;
- }
- if (!ok && self->length <= 0) {
- ++bad;
}
- } while (!ok && !bad);
- if (bad) {
- return NULL;
- }
+ } while (end < 0);
+
- header = ap_pstrndup(self->r->pool, self->buffer, end+2);
- /*XXX: need to merge continuation lines here?*/
+#if DEBUG == 1
+ ap_log_rerror(MPB_ERROR,
+ "[libapreq]: DONE READING HEADER: OFF=%d, LENGTH=%d\n%s",
+ self->buffer_off,
+ self->buffer_len, self->buffer + self->buffer_off);
+#endif /* DEBUG */
tab = ap_make_table(self->r->pool, 10);
- self->buffer += end+4;
- self->buffer_len -= end+4;
- {
- char *entry;
- while ((entry = ap_getword_nc(self->r->pool, &header, '\r')) && *header) {
- char *key;
- key = ap_getword_nc(self->r->pool, &entry, ':');
- while (ap_isspace(*entry)) {
- ++entry;
- }
- if (*header == '\n') {
- ++header;
- }
- ap_table_add(tab, key, entry);
+ while ((entry = ap_getword_nc(self->subp, &header, '\r')) && *header) {
+ char *key;
+ key = ap_getword_nc(self->subp, &entry, ':');
+ while (ap_isspace(*entry)) {
+ ++entry;
}
+ if (*header == '\n') {
+ ++header;
+ }
+ ap_table_add(tab, key, entry); /* copies key & entry strings to tab */
}
+ ap_clear_pool(self->subp); /* OK to clean up here */
+
return tab;
}
char *multipart_buffer_read_body(multipart_buffer *self)
{
+ /* This function is NOT used for file uploads!
+ See multipart_buffer_read instead. */
+
char *data, *retval=NULL;
- int blen = 0, old_len = 0;
+ int blen = 0, old_len = 0, bytes = FILLUNIT;
- while ((data = multipart_buffer_read(self, 0, &blen))) {
+ /* This loop will ultimately allocate roughly T * (N+1) / 2
+ * bytes of memory - here T is the total size of input data to be read,
+ * and N is the number of loops required to read it all in, i.e.
+ * N = T / bytes. Since we have no way of knowing T in advance (all we
+ * know is that T <= self->length), we'll just hope that T is at most
+ * a few KB, in which case N = 1 or 2. Since this memory is cleaned up
+ * before we call this function again, this should be fine. If you're
+ * going to allow users to POST large amounts of data, either make them
+ * upload it through a file or use enctype=x-www-form-urlencoded in your
+ * form. If you're still having memory troubles here, do a client-side
+ * check on the size of the form's values before submitting the data to
+ * the server and/or increase FILLUNIT and BUFSIZE in multipart_buffer.h
+ * as appropriate. Don't forget about your POST_MAX settings!
+ */
+
+ while ((data = multipart_buffer_read(self, bytes, &blen))) {
retval = retval ?
- my_join(self->r->pool, retval, old_len, data, blen) :
- ap_pstrndup(self->r->pool, data, blen);
- old_len = blen;
+ my_join(self->subp, retval, old_len, data, blen) :
+ ap_pstrndup(self->subp, data, blen);
+ old_len += blen;
}
+ /* can't clear subp here w/o destroying retval */
+
return retval;
}
@@ -307,14 +410,14 @@
multipart_buffer *self = (multipart_buffer *)
ap_pcalloc(r->pool, sizeof(multipart_buffer));
+ self->subp = ap_make_sub_pool(r->pool);
self->r = r;
self->length = length;
self->boundary = ap_pstrcat(r->pool, "--", boundary, NULL);
self->boundary_length = strlen(self->boundary);
self->boundary_end = ap_pstrcat(r->pool, self->boundary, "--", NULL);
- self->buffer = NULL;
- self->buffer_len = 0;
- self->subp = ap_make_sub_pool(self->r->pool);
+ self->buffer_off = 0;
+ self->buffer_len = 0;
/* Read the preamble and the topmost (boundary) line plus the CRLF. */
(void)multipart_buffer_read(self, 0, &blen);
@@ -325,3 +428,5 @@
return self;
}
+
+
diff -ur libapreq-0.31/c/multipart_buffer.h libapreq/c/multipart_buffer.h
--- libapreq-0.31/c/multipart_buffer.h Sat May 1 02:44:28 1999
+++ libapreq/c/multipart_buffer.h Sat Jul 1 00:29:29 2000
@@ -1,24 +1,30 @@
-#include "apache_request.h"
+#include "apache_request.h" /* JS: 6/30/2000 - more comments and fixes */
+
+#define BUFSIZE (1024 * 8) /* >> maximum possible boundary_length */
+#define FILLUNIT (1024 * 4) /* < BUFSIZE (default byte read) */
typedef struct {
request_rec *r;
pool *subp;
long length;
- long total;
- long boundary_length;
- char *boundary;
+ long total;
+ long boundary_length; /* ~ 40 ; see BUFSIZE above */
+ char *boundary;
char *boundary_end;
- char *buffer;
- long buffer_len;
+ char buffer[BUFSIZE];
+ long buffer_len; /* size of new data in buff[BUFSIZE] */
+ long buffer_off; /* NEW: starting point of new data in buff[BUFSIZE] */
} multipart_buffer;
#define multipart_buffer_eof(self) \
-(((self->buffer == NULL) || (*self->buffer == '\0')) && (self->length <= 0))
-
+(((self->buffer_len == 0 ) || (*(self->buffer + self->buffer_off) == '\0')) \
+&& (self->length <= 0))
char *multipart_buffer_read_body(multipart_buffer *self);
table *multipart_buffer_headers(multipart_buffer *self);
-void multipart_buffer_fill(multipart_buffer *self, long bytes);
+void multipart_buffer_flush(multipart_buffer *self); /* new */
+int multipart_buffer_fill(multipart_buffer *self, long bytes);
char *multipart_buffer_read(multipart_buffer *self, long bytes, int *blen);
multipart_buffer *multipart_buffer_new(char *boundary, long length, request_rec *r);
-
#define MPB_ERROR APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, self->r
+
+