Hi Justin.
A good place to test the de-chunk logic is via the proxy module.

via the proxy, go to sites like apple.com/macosx
and see if the dechunking breaks them.

Regards
Ian
Justin Erenkrantz wrote:

> I started out trying to get flood to support chunked encoding (as
> recent changes to httpd have started to let chunked encoding work).
> Then, I decided to look at how httpd-2.0 handles dechunking (as it 
> needs to be able to handle chunked input).
> 
> The current dechunk code can't handle trailers properly (does anybody
> actually use them?) - according to my reading of RFC 2616, you can 
> have multiple trailers (Roy?).  So, I believe that part is broken.
> 
> While I was examining the code, I ran into all of gstein's comments 
> in dechunk_filter.  And, then in ap_http_filter.  So, this is an 
> attempt to try to address some of the issues in http_protocol.c.
> 
> This also has some patches and new functions to apr-util's buckets 
> that seem like they are needed.  Also, why is the socket (and pipe)
> code creating 0-length immortal buckets?  I don't understand why 
> and it serves to complicate things.
> 
> The only thing I can say with this is that it compiles and still
> serves requests.  But, I haven't put it through any extensive 
> testing (i.e. any POSTs).  I'd like to go to bed now, so I'm 
> sending this out to the list so that people can review this and 
> give feedback/flames.  I'll keep playing with this tomorrow...
> 
> The only question I have right now is whether we need anyone
> to produce the EOS bucket on input.  I'm not terribly sure of 
> that (seems Greg was thinking along these lines too).  I know it 
> is needed in the output, but I wonder where the right place is 
> for the EOS on input.  Thoughts?  -- justin
> 
> Index: include/apr_buckets.h
> ===================================================================
> RCS file: /home/cvs/apr-util/include/apr_buckets.h,v
> retrieving revision 1.118
> diff -u -r1.118 apr_buckets.h
> --- include/apr_buckets.h     2001/09/22 22:36:07     1.118
> +++ include/apr_buckets.h     2001/09/23 10:36:12
> @@ -689,6 +689,17 @@
>                                               apr_off_t *length);
>  
>  /**
> + * create a flat buffer of the elements in a bucket_brigade.
> + * @param b The bucket brigade to create the buffer from
> + * @param c The pointer to the returned character string
> + * @param len The length of the returned character string
> + * @param p The pool to allocate the character string from.
> + * Note: You *must* have enough space allocated to store all of the data.
> + * See apr_brigade_length().
> + */
> +APU_DECLARE(apr_status_t) apr_brigade_to_buffer(apr_bucket_brigade *b, char *c);
> +
> +/**
>   * create an iovec of the elements in a bucket_brigade... return number 
>   * of elements used.  This is useful for writing to a file or to the
>   * network efficiently.
> Index: buckets/apr_brigade.c
> ===================================================================
> RCS file: /home/cvs/apr-util/buckets/apr_brigade.c,v
> retrieving revision 1.25
> diff -u -r1.25 apr_brigade.c
> --- buckets/apr_brigade.c     2001/09/03 22:00:36     1.25
> +++ buckets/apr_brigade.c     2001/09/23 10:36:12
> @@ -221,6 +221,29 @@
>      return APR_SUCCESS;
>  }
>  
> +APU_DECLARE(apr_status_t) apr_brigade_to_buffer(apr_bucket_brigade *b, char *c)
> +{
> +    apr_bucket *e;
> +    char *current;
> +    apr_size_t curlen;
> +    apr_status_t status;
> +
> +    current = c;
> +
> +    APR_BRIGADE_FOREACH(e, b) {
> +
> +        status = apr_bucket_read(e, (const char **)&current, &curlen,
> +                                 APR_BLOCK_READ);
> +        if (status != APR_SUCCESS)
> +            return status;
> +        
> +        current += curlen;
> +    }
> +
> +    return APR_SUCCESS;
> +
> +}
> +
>  APU_DECLARE(apr_status_t) apr_brigade_to_iovec(apr_bucket_brigade *b, 
>                                                 struct iovec *vec, int *nvec)
>  {
> Index: buckets/apr_buckets_pipe.c
> ===================================================================
> RCS file: /home/cvs/apr-util/buckets/apr_buckets_pipe.c,v
> retrieving revision 1.41
> diff -u -r1.41 apr_buckets_pipe.c
> --- buckets/apr_buckets_pipe.c        2001/09/22 22:36:07     1.41
> +++ buckets/apr_buckets_pipe.c        2001/09/23 10:36:13
> @@ -106,15 +106,8 @@
>      }
>      else {
>          free(buf);
> -        a = apr_bucket_immortal_make(a, "", 0);
> -        *str = a->data;
> -        if (rv == APR_EOF) {
> +        if (rv == APR_EOF)
>              apr_file_close(p);
> -            if (block == APR_NONBLOCK_READ) {
> -                /* XXX: this is bogus, should return APR_SUCCESS */
> -                return APR_EOF;
> -            }
> -        }
>      }
>      return APR_SUCCESS;
>  }
> Index: buckets/apr_buckets_socket.c
> ===================================================================
> RCS file: /home/cvs/apr-util/buckets/apr_buckets_socket.c,v
> retrieving revision 1.30
> diff -u -r1.30 apr_buckets_socket.c
> --- buckets/apr_buckets_socket.c      2001/09/22 22:36:07     1.30
> +++ buckets/apr_buckets_socket.c      2001/09/23 10:36:13
> @@ -79,7 +79,7 @@
>      }
>  
>      if (rv != APR_SUCCESS && rv != APR_EOF) {
> -     free(buf);
> +        free(buf);
>          return rv;
>      }
>      /*
> @@ -109,12 +109,6 @@
>      }
>      else {
>          free(buf);
> -        a = apr_bucket_immortal_make(a, "", 0);
> -        *str = a->data;
> -        if (rv == APR_EOF && block == APR_NONBLOCK_READ) {
> -            /* XXX: this is bogus... should return APR_SUCCESS */
> -            return APR_EOF;
> -        }
>      }
>      return APR_SUCCESS;
>  }
> 
> Index: modules/http/http_protocol.c
> ===================================================================
> RCS file: /home/cvs/httpd-2.0/modules/http/http_protocol.c,v
> retrieving revision 1.362
> diff -u -r1.362 http_protocol.c
> --- modules/http/http_protocol.c      2001/09/17 13:12:37     1.362
> +++ modules/http/http_protocol.c      2001/09/23 10:56:30
> @@ -487,6 +487,7 @@
>      enum {
>          WANT_HDR /* must have value zero */,
>          WANT_BODY,
> +        WANT_ENDCHUNK,
>          WANT_TRL
>      } state;
>  };
> @@ -498,9 +499,7 @@
>  {
>      apr_status_t rv;
>      struct dechunk_ctx *ctx = f->ctx;
> -    apr_bucket *b;
> -    const char *buf;
> -    apr_size_t len;
> +    apr_off_t len;
>  
>      if (!ctx) {
>          f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(struct dechunk_ctx));
> @@ -508,43 +507,38 @@
>  
>      do {
>          if (ctx->chunk_size == ctx->bytes_delivered) {
> -            /* Time to read another chunk header or trailer...  ap_http_filter() is 
> -             * the next filter in line and it knows how to return a brigade with 
> -             * one line.
> -             */
> +            /* Time to read another chunk header or trailer...  We only want
> +             * one line - by specifying 0 as the length to read.  */
>              char line[30];
>              
> -            if ((rv = ap_getline(line, sizeof(line), f->r,
> -                                 0 /* readline */)) < 0) {
> +            if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) {
>                  return rv;
>              }
>              switch(ctx->state) {
>              case WANT_HDR:
>                  ctx->chunk_size = get_chunk_size(line);
>                  ctx->bytes_delivered = 0;
> -                if (ctx->chunk_size == 0) {
> +                if (ctx->chunk_size == 0)
>                      ctx->state = WANT_TRL;
> -                }
> -                else {
> +                else
>                      ctx->state = WANT_BODY;
> -                }
>                  break;
> +            case WANT_ENDCHUNK:
> +                /* We don't care.  Next state please. */
> +                ctx->state = WANT_TRL;
> +                break;
>              case WANT_TRL:
> -                /* XXX sanity check end chunk here */
> -                if (strlen(line)) {
> -                    /* bad trailer */
> -                }
> -                if (ctx->chunk_size == 0) { /* we just finished the last chunk? */
> -                    /* ### woah... ap_http_filter() is doing this, too */
> -                    /* append eos bucket and get out */
> -                    b = apr_bucket_eos_create();
> -                    APR_BRIGADE_INSERT_TAIL(bb, b);
> +                /* Keep reading until we get to a blank line. */
> +                /* Should add these headers to r->headers_in? */
> +                if (!strlen(line))
> +                {
> +                    ctx->state = WANT_HDR;
>                      return APR_SUCCESS;
>                  }
> -                ctx->state = WANT_HDR;
> -                break;
> +                break; 
>              default:
> -                ap_assert(ctx->state == WANT_HDR || ctx->state == WANT_TRL);
> +                ap_assert(ctx->state == WANT_HDR || ctx->state == WANT_TRL ||
> +                          ctx->state == WANT_ENDCHUNK);
>              }
>          }
>      } while (ctx->state != WANT_BODY);
> @@ -558,27 +552,11 @@
>              return rv;
>          }
>  
> -        /* Walk through the body, accounting for bytes, and removing an eos
> -         * bucket if ap_http_filter() delivered the entire chunk.
> -         *
> -         * ### this shouldn't be necessary. 1) ap_http_filter shouldn't be
> -         * ### adding EOS buckets. 2) it shouldn't return more bytes than
> -         * ### we requested, therefore the total len can be found with a
> -         * ### simple call to apr_brigade_length(). no further munging
> -         * ### would be needed.
> -         */
> -        b = APR_BRIGADE_FIRST(bb);
> -        while (b != APR_BRIGADE_SENTINEL(bb) && !APR_BUCKET_IS_EOS(b)) {
> -            apr_bucket_read(b, &buf, &len, mode);
> -            AP_DEBUG_ASSERT(len <= ctx->chunk_size - ctx->bytes_delivered);
> -            ctx->bytes_delivered += len;
> -            b = APR_BUCKET_NEXT(b);
> -        }
> -        if (ctx->bytes_delivered == ctx->chunk_size) {
> -            AP_DEBUG_ASSERT(APR_BUCKET_IS_EOS(b));
> -            apr_bucket_delete(b);
> -            ctx->state = WANT_TRL;
> -        }
> +        /* Have we read a complete chunk?  */
> +        apr_brigade_length(bb, 1, &len);
> +        ctx->bytes_delivered += len;
> +        if (ctx->bytes_delivered == ctx->chunk_size)
> +            ctx->state = WANT_ENDCHUNK;
>      }
>  
>      return APR_SUCCESS;
> @@ -588,7 +566,8 @@
>      apr_bucket_brigade *b;
>  } http_ctx_t;
>  
> -apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t 
>mode, apr_off_t *readbytes)
> +apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, 
> +                            ap_input_mode_t mode, apr_off_t *readbytes)
>  {
>      apr_bucket *e;
>      char *buff;
> @@ -641,7 +620,8 @@
>      }
>  
>      if (APR_BRIGADE_EMPTY(ctx->b)) {
> -        if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != APR_SUCCESS) 
>{
> +        if ((rv = ap_get_brigade(f->next, ctx->b, mode, 
> +                                 readbytes)) != APR_SUCCESS) {
>              return rv;
>          }
>      }
> @@ -655,12 +635,16 @@
>      if (*readbytes == -1) {
>          apr_bucket *e;
>          apr_off_t total;
> +        const char *str;
> +        apr_size_t len;
>          APR_BRIGADE_FOREACH(e, ctx->b) {
> -            const char *str;
> -            apr_size_t len;
> +            /* We don't care about these values.  We just want to force the
> +             * lower level to convert it.  This seems hackish. */
>              apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
>          }
>          APR_BRIGADE_CONCAT(b, ctx->b);
> +
> +        /* Force a recompute of the length */
>          apr_brigade_length(b, 1, &total);
>          *readbytes = total;
>          e = apr_bucket_eos_create();
> @@ -670,67 +654,40 @@
>      /* readbytes == 0 is "read a single line". otherwise, read a block. */
>      if (*readbytes) {
>          apr_off_t total;
> +        apr_bucket *e;
> +        apr_bucket_brigade *newbb;
>  
> -        /* ### the code below, which moves bytes from one brigade to the
> -           ### other is probably bogus. presuming the next filter down was
> -           ### working properly, it should not have returned more than
> -           ### READBYTES bytes, and we wouldn't have to do any work.
> -        */
> -
> -        APR_BRIGADE_NORMALIZE(ctx->b);
> -        if (APR_BRIGADE_EMPTY(ctx->b)) {
> -            if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != 
>APR_SUCCESS) {
> -                return rv;
> -            }
> -        }
> -            
> +        newbb = NULL;
> +
>          apr_brigade_partition(ctx->b, *readbytes, &e);
> -        APR_BRIGADE_CONCAT(b, ctx->b);
> +        /* Must do split before CONCAT */
>          if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
> -            apr_bucket_brigade *temp;
> +            newbb = apr_brigade_split(ctx->b, e);
> +        }
> +        APR_BRIGADE_CONCAT(b, ctx->b);
>  
> -            temp = apr_brigade_split(b, e);
> +        /* FIXME: Is this really needed?  Due to pointer use in sentinels,
> +         * I think so. */
> +        if (newbb)
> +            APR_BRIGADE_CONCAT(ctx->b, newbb);
>  
> -            /* ### darn. gotta ensure the split brigade is in the proper pool.
> -               ### this is a band-aid solution; we shouldn't even be doing
> -               ### all of this brigade munging (per the comment above).
> -               ### until then, this will get the right lifetimes. */
> -            APR_BRIGADE_CONCAT(ctx->b, temp);
> -        }
> -        else {
> -            if (!APR_BRIGADE_EMPTY(ctx->b)) {
> -                ctx->b = NULL; /*XXX*/
> -            }
> -        }
> +        /* FIXME: Are we assuming that b is empty when we enter? */
>          apr_brigade_length(b, 1, &total);
> -        *readbytes -= total;
> +        *readbytes = total;
>  
> -        /* ### this is a hack. it is saying, "if we have read everything
> -           ### that was requested, then we are at the end of the request."
> -           ### it presumes that the next filter up will *only* call us
> -           ### with readbytes set to the Content-Length of the request.
> -           ### that may not always be true, and this code is *definitely*
> -           ### too presumptive of the caller's intent. the point is: this
> -           ### filter is not the guy that is parsing the headers or the
> -           ### chunks to determine where the end of the request is, so we
> -           ### shouldn't be monkeying with EOS buckets.
> -        */
> -        if (*readbytes == 0) {
> -            apr_bucket *eos = apr_bucket_eos_create();
> -                
> -            APR_BRIGADE_INSERT_TAIL(b, eos);
> -        }
>          return APR_SUCCESS;
>      }
>  
>      /* we are reading a single line, e.g. the HTTP headers */
>      while (!APR_BRIGADE_EMPTY(ctx->b)) {
>          e = APR_BRIGADE_FIRST(ctx->b);
> -        if ((rv = apr_bucket_read(e, (const char **)&buff, &len, mode)) != 
>APR_SUCCESS) {
> +        if ((rv = apr_bucket_read(e, (const char **)&buff, &len, 
> +                                  mode)) != APR_SUCCESS) {
>              return rv;
>          }
>  
>          pos = memchr(buff, APR_ASCII_LF, len);
> +        /* We found a match. */
>          if (pos != NULL) {
>              apr_bucket_split(e, pos - buff + 1);
>              APR_BUCKET_REMOVE(e);
> @@ -740,6 +697,7 @@
>          APR_BUCKET_REMOVE(e);
>          APR_BRIGADE_INSERT_TAIL(b, e);
>      }
> +
>      return APR_SUCCESS;
>  }
>  
> @@ -1517,14 +1475,12 @@
>   */
>  AP_DECLARE(long) ap_get_client_block(request_rec *r, char *buffer, apr_size_t 
>bufsiz)
>  {
> -    apr_size_t len_read, total;
> -    apr_status_t rv;
> -    apr_bucket *b, *old;
> -    const char *tempbuf;
> +    apr_off_t len;
> +    apr_bucket *b;
>      core_request_config *req_cfg =
>       (core_request_config *)ap_get_module_config(r->request_config,
>                                                      &core_module);
> -    apr_bucket_brigade *bb = req_cfg->bb;
> +    apr_bucket_brigade *bb = req_cfg->bb, *newbb;
>  
>      do {
>          if (APR_BRIGADE_EMPTY(bb)) {
> @@ -1546,41 +1502,32 @@
>          return 0;
>      }
>  
> -    /* ### it would be nice to replace the code below with "consume N bytes
> -       ### from this brigade, placing them into that buffer." there are
> -       ### other places where we do the same...
> -       ###
> -       ### alternatively, we could partition the brigade, then call a
> -       ### function which serializes a given brigade into a buffer. that
> -       ### semantic is used elsewhere, too...
> -    */
> -
> -    total = 0;
> -    while (total < bufsiz
> -           && b != APR_BRIGADE_SENTINEL(bb)
> -           && !APR_BUCKET_IS_EOS(b)) {
> -
> -        if ((rv = apr_bucket_read(b, &tempbuf, &len_read, APR_BLOCK_READ)) != 
>APR_SUCCESS) {
> -            return -1;
> -        }
> -        if (total + len_read > bufsiz) {
> -            apr_bucket_split(b, bufsiz - total);
> -            len_read = bufsiz - total;
> -        }
> -        memcpy(buffer, tempbuf, len_read);
> -        buffer += len_read;
> -        total += len_read;
> -        /* XXX the next two fields shouldn't be mucked with here, as they are in 
>terms
> -         * of bytes in the unfiltered body; gotta see if anybody else actually uses 
> -         * these
> -         */
> -        r->read_length += len_read;      /* XXX yank me? */
> -        old = b;
> -        b = APR_BUCKET_NEXT(b);
> -        apr_bucket_delete(old);
> -    }
> +    /* 1) Determine length to see if we may overrun. 
> +     * 2) Partition the brigade at the appropriate point.
> +     * 3) Split the brigade at the new partition.
> +     * 4) Read the old brigade into the buffer.
> +     * 5) Destroy the old brigade.
> +     * 6) Point the context at the new brigade.
> +     */
> +    apr_brigade_length(bb, 1, &len);
> +
> +    if (bufsiz < len)
> +        len = bufsiz;
> +
> +    if (apr_brigade_partition(bb, len, &b) != APR_SUCCESS)
> +        return -1;
> +
> +    newbb = apr_brigade_split(bb, b);
> +
> +    if (apr_brigade_to_buffer(bb, buffer) != APR_SUCCESS)
> +        return -1;
> +
> +    if (apr_brigade_destroy(bb) != APR_SUCCESS)
> +        return -1;
> +
> +    req_cfg->bb = newbb;
>  
> -    return total;
> +    return bufsiz;
>  }
>  
>  /* In HTTP/1.1, any method can have a body.  However, most GET handlers
> 
> 


Reply via email to