[PATCH] chunked requests

Tsantilas Christos Tue, 07 Jul 2009 15:31:29 -0700

Hi all,

Alex ask me to separate chunked request patch from icap accesslogging patch as requested by Amos.


This is the chunked request patch developed by Alex.

Christos

=== modified file 'src/HttpMsg.cc'
--- src/HttpMsg.cc	2009-02-02 12:50:23 +0000
+++ src/HttpMsg.cc	2009-07-06 20:28:38 +0000
@@ -309,6 +309,14 @@
     return -1;
 }
 
+void
+HttpMsg::setContentLength(int64_t clen)
+{
+    header.delById(HDR_CONTENT_LENGTH); // if any
+    header.putInt64(HDR_CONTENT_LENGTH, clen);
+    content_length = clen;
+}
+
 /* returns true if connection should be "persistent"
  * after processing this message */
 int

=== modified file 'src/HttpMsg.h'
--- src/HttpMsg.h	2009-01-21 03:47:47 +0000
+++ src/HttpMsg.h	2009-07-06 20:28:38 +0000
@@ -58,6 +58,9 @@
     ///< produce a message copy, except for a few connection-specific settings
     virtual HttpMsg *clone() const = 0; ///< \todo rename: not a true copy?
 
+    /// [re]sets Content-Length header and cached value
+    void setContentLength(int64_t clen);
+
 public:
     HttpVersion http_ver;
 
@@ -109,7 +112,8 @@
 };
 
 /* Temporary parsing state; might turn into the replacement parser later on */
-struct _HttpParser {
+class HttpParser {
+public:
     char state;
     const char *buf;
     int bufsiz;
@@ -120,7 +124,6 @@
     int v_start, v_end;
     int v_maj, v_min;
 };
-typedef struct _HttpParser HttpParser;
 
 extern void HttpParserInit(HttpParser *, const char *buf, int len);
 extern int HttpParserParseReqLine(HttpParser *hp);

=== modified file 'src/cf.data.pre'
--- src/cf.data.pre	2009-06-28 11:47:27 +0000
+++ src/cf.data.pre	2009-07-06 20:19:22 +0000
@@ -3265,6 +3265,33 @@
 	be no limit imposed.
 DOC_END
 
+NAME: chunked_request_body_max_size
+COMMENT: (bytes)
+TYPE: b_int64_t
+DEFAULT: 64 KB
+LOC: Config.maxChunkedRequestBodySize
+DOC_START
+       A broken or confused HTTP/1.1 client may send a chunked HTTP
+       request to Squid. Squid does not have full support for that
+       feature yet. To cope with such requests, Squid buffers the
+       entire request and then dechunks request body to create a
+       plain HTTP/1.0 request with a known content length. The plain
+       request is then used by the rest of Squid code as usual.
+
+       The option value specifies the maximum size of the buffer used
+       to hold the request before the conversion. If the chunked
+       request size exceeds the specified limit, the conversion
+       fails, and the client receives an "unsupported request" error,
+       as if dechunking was disabled.
+
+       Dechunking is enabled by default. To disable conversion of
+       chunked requests, set the maximum to zero.
+
+       Request dechunking feature and this option in particular are a
+       temporary hack. When chunking requests and responses are fully
+       supported, there will be no need to buffer a chunked request.
+DOC_END
+
 NAME: broken_posts
 IFDEF: HTTP_VIOLATIONS
 TYPE: acl_access

=== modified file 'src/client_side.cc'
--- src/client_side.cc	2009-06-14 12:18:00 +0000
+++ src/client_side.cc	2009-07-07 20:39:04 +0000
@@ -103,6 +103,7 @@
 #include "ClientRequestContext.h"
 #include "MemBuf.h"
 #include "SquidTime.h"
+#include "ChunkedCodingParser.h"
 
 #if LINGERING_CLOSE
 #define comm_close comm_lingering_close
@@ -1873,6 +1874,18 @@
     }
 }
 
+// Temporary hack helper: determine whether the request is chunked, expensive
+static bool
+isChunkedRequest(const HttpParser *hp) {
+    HttpRequest request;
+    if (!request.parseHeader(HttpParserHdrBuf(hp), HttpParserHdrSz(hp)))
+       return false;
+
+    return request.header.has(HDR_TRANSFER_ENCODING) &&
+       request.header.hasListMember(HDR_TRANSFER_ENCODING, "chunked", ',');
+}
+
+
 /**
  *  parseHttpRequest()
  *
@@ -1885,7 +1898,6 @@
 static ClientSocketContext *
 parseHttpRequest(ConnStateData *conn, HttpParser *hp, HttpRequestMethod * method_p, HttpVersion *http_ver)
 {
-    char *url = NULL;
     char *req_hdr = NULL;
     char *end;
     size_t req_sz;
@@ -1952,17 +1964,6 @@
         return parseHttpRequestAbort(conn, "error:unsupported-request-method");
     }
 
-    /* set url */
-    /*
-     * XXX this should eventually not use a malloc'ed buffer; the transformation code
-     * below needs to be modified to not expect a mutable nul-terminated string.
-     */
-    url = (char *)xmalloc(hp->u_end - hp->u_start + 16);
-
-    memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1);
-
-    url[hp->u_end - hp->u_start + 1] = '\0';
-
     /*
      * Process headers after request line
      * TODO: Use httpRequestParse here.
@@ -1982,7 +1983,6 @@
      */
     if ( squid_strnstr(req_hdr, "\r\r\n", req_sz) ) {
         debugs(33, 1, "WARNING: suspicious HTTP request contains double CR");
-        xfree(url);
         return parseHttpRequestAbort(conn, "error:double-CR");
     }
 
@@ -1990,6 +1990,35 @@
            (int) HttpParserRequestLen(hp) << ", req_line_sz = " <<
            HttpParserReqSz(hp));
 
+    // Temporary hack: We might receive a chunked body from a broken HTTP/1.1
+    // client that sends chunked requests to HTTP/1.0 Squid. If the request
+    // might have a chunked body, parse the headers early to look for the
+    // "Transfer-Encoding: chunked" header. If we find it, wait until the
+    // entire body is available so that we can set the content length and
+    // forward the request without chunks. The primary reason for this is
+    // to avoid forwarding a chunked request because the server side lacks
+    // logic to determine when it is valid to do so.
+    // FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS below will replace this hack.
+    if (hp->v_min == 1 && hp->v_maj == 1 && // broken client, may send chunks
+        Config.maxChunkedRequestBodySize > 0 && // configured to dechunk
+        (*method_p == METHOD_PUT || *method_p == METHOD_POST)) {
+
+        // check only once per request because isChunkedRequest is expensive
+        if (conn->in.dechunkingState == ConnStateData::chunkUnknown) {
+            if (isChunkedRequest(hp))
+                conn->startDechunkingRequest(hp);
+            else
+                conn->in.dechunkingState = ConnStateData::chunkNone;
+        }
+
+        if (conn->in.dechunkingState == ConnStateData::chunkParsing) {
+            if (conn->parseRequestChunks(hp)) // parses newly read chunks
+                return NULL; // wait for more data
+            debugs(33, 5, HERE << "Got complete chunked request or err.");
+            assert(conn->in.dechunkingState != ConnStateData::chunkParsing);
+        }
+    }
+
     /* Ok, all headers are received */
     http = new ClientHttpRequest(conn);
 
@@ -2006,6 +2035,17 @@
 
     debugs(33, 5, "parseHttpRequest: Request Header is\n" <<(hp->buf) + hp->hdr_start);
 
+    /* set url */
+    /*
+     * XXX this should eventually not use a malloc'ed buffer; the transformation code
+     * below needs to be modified to not expect a mutable nul-terminated string.
+     */
+    char *url = (char *)xmalloc(hp->u_end - hp->u_start + 16);
+
+    memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1);
+
+    url[hp->u_end - hp->u_start + 1] = '\0';
+
 #if THIS_VIOLATES_HTTP_SPECS_ON_URL_TRANSFORMATION
 
     if ((t = strchr(url, '#')))	/* remove HTML anchors */
@@ -2157,6 +2197,11 @@
 int
 connKeepReadingIncompleteRequest(ConnStateData * conn)
 {
+    // when we read chunked requests, the entire body is buffered
+    // XXX: this check ignores header size and its limits.
+    if (conn->in.dechunkingState == ConnStateData::chunkParsing)
+        return conn->in.notYetUsed < Config.maxChunkedRequestBodySize;
+
     return conn->in.notYetUsed >= Config.maxRequestHeaderSize ? 0 : 1;
 }
 
@@ -2166,8 +2211,13 @@
     ClientSocketContext *context = parseHttpRequestAbort(conn, "error:request-too-large");
     clientStreamNode *node = context->getClientReplyContext();
     assert(!connKeepReadingIncompleteRequest(conn));
-    debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)");
-    debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes.");
+    if (conn->in.dechunkingState == ConnStateData::chunkParsing) {
+        debugs(33, 1, "Chunked request is too large (" << conn->in.notYetUsed << " bytes)");
+        debugs(33, 1, "Config 'chunked_request_body_max_size'= " << Config.maxChunkedRequestBodySize << " bytes.");
+    } else {
+        debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)");
+        debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes.");
+    }
     clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
     assert (repContext);
     repContext->setReplyToError(ERR_TOO_BIG,
@@ -2213,6 +2263,9 @@
     ClientHttpRequest *http = context->http;
     HttpRequest *request = NULL;
     bool notedUseOfBuffer = false;
+    bool tePresent = false;
+    bool deChunked = false;
+    bool unsupportedTe = false;
 
     /* We have an initial client stream in place should it be needed */
     /* setup our private context */
@@ -2312,8 +2365,17 @@
     request->my_addr = conn->me;
     request->http_ver = http_ver;
 
-    if (!urlCheckRequest(request) ||
-            request->header.has(HDR_TRANSFER_ENCODING)) {
+    tePresent = request->header.has(HDR_TRANSFER_ENCODING);
+    deChunked = conn->in.dechunkingState == ConnStateData::chunkReady;
+    if (deChunked) {
+        assert(tePresent);
+        request->setContentLength(conn->in.dechunked.contentSize());
+        request->header.delById(HDR_TRANSFER_ENCODING);
+        conn->finishDechunkingRequest(hp);
+    }
+
+    unsupportedTe = tePresent && !deChunked;
+    if (!urlCheckRequest(request) || unsupportedTe) {
         clientStreamNode *node = context->getClientReplyContext();
         clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
         assert (repContext);
@@ -2647,13 +2709,73 @@
 {
     assert(bodyPipe != NULL);
 
-    if (const size_t putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed))
+    size_t putSize = 0;    
+
+#if FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS
+   // The code below works, in principle, but we cannot do dechunking 
+   // on-the-fly because that would mean sending chunked requests to
+   // the next hop. Squid lacks logic to determine which servers can
+   // receive chunk requests.
+   // The error generation code probably needs more work.
+    if (in.bodyParser) { // chunked body
+        debugs(33,5, HERE << "handling chunked request body for FD " << fd);
+        bool malformedChunks = false;
+
+        MemBuf raw; // ChunkedCodingParser only works with MemBufs
+        raw.init(in.notYetUsed, in.notYetUsed);
+        raw.append(in.buf, in.notYetUsed);
+        try { // the parser will throw on errors
+            const mb_size_t wasContentSize = raw.contentSize();
+            BodyPipeCheckout bpc(*bodyPipe);
+            const bool parsed = in.bodyParser->parse(&raw, &bpc.buf);
+            bpc.checkIn();
+            putSize = wasContentSize - raw.contentSize();
+
+            if (parsed) {
+                stopProducingFor(bodyPipe, true); // this makes bodySize known
+            } else {
+                // parser needy state must imply body pipe needy state
+                if (in.bodyParser->needsMoreData() &&
+                    !bodyPipe->mayNeedMoreData())
+                    malformedChunks = true;
+                // XXX: if bodyParser->needsMoreSpace, how can we guarantee it?
+            }
+        } catch (...) { // XXX: be more specific
+            malformedChunks = true;
+        }
+
+        if (malformedChunks) {
+            if (bodyPipe != NULL)
+                stopProducingFor(bodyPipe, false);
+
+            ClientSocketContext::Pointer context = getCurrentContext();
+            if (!context->http->out.offset) {
+                clientStreamNode *node = context->getClientReplyContext();
+                clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
+                assert (repContext);
+                repContext->setReplyToError(ERR_INVALID_REQ, HTTP_BAD_REQUEST,
+                    METHOD_NONE, NULL, &peer.sin_addr,
+                    NULL, NULL, NULL);
+                context->pullData();
+            }
+            flags.readMoreRequests = false;
+            return; // XXX: is that sufficient to generate an error?
+        }
+    } else // identity encoding 
+#endif
+    {
+        debugs(33,5, HERE << "handling plain request body for FD " << fd);
+        putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed);
+        if (!bodyPipe->mayNeedMoreData()) {
+            // BodyPipe will clear us automagically when we produced everything
+            bodyPipe = NULL;
+        }
+    }
+
+    if (putSize > 0)
         connNoteUseOfBuffer(this, putSize);
 
-    if (!bodyPipe->mayNeedMoreData()) {
-        // BodyPipe will clear us automagically when we produced everything
-        bodyPipe = NULL;
-
+    if (!bodyPipe) {
         debugs(33,5, HERE << "produced entire request body for FD " << fd);
 
         if (closing()) {
@@ -3434,19 +3556,121 @@
     bodyPipe->enableAutoConsumption();
 }
 
+// initialize dechunking state
+void
+ConnStateData::startDechunkingRequest(HttpParser *hp)
+{
+    debugs(33, 5, HERE << "start dechunking at " << HttpParserRequestLen(hp));
+    assert(in.dechunkingState == chunkUnknown);
+    assert(!in.bodyParser);
+    in.bodyParser = new ChunkedCodingParser;
+    in.chunkedSeen = HttpParserRequestLen(hp); // skip headers when dechunking
+    in.chunked.init();  // TODO: should we have a smaller-than-default limit?
+    in.dechunked.init();
+    in.dechunkingState = chunkParsing;
+}
+
+// put parsed content into input buffer and clean up
+void
+ConnStateData::finishDechunkingRequest(HttpParser *hp)
+{
+    debugs(33, 5, HERE << "finish dechunking; content: " << in.dechunked.contentSize());
+
+    assert(in.dechunkingState == chunkReady);
+    assert(in.bodyParser); 
+    delete in.bodyParser;
+    in.bodyParser = NULL;
+
+    const mb_size_t headerSize = HttpParserRequestLen(hp);
+
+    // dechunking cannot make data bigger
+    assert(headerSize + in.dechunked.contentSize() + in.chunked.contentSize()
+        <= static_cast<mb_size_t>(in.notYetUsed));
+    assert(in.notYetUsed <= in.allocatedSize);
+
+    // copy dechunked content
+    char *end = in.buf + headerSize;
+    xmemmove(end, in.dechunked.content(), in.dechunked.contentSize());
+    end += in.dechunked.contentSize();
+
+    // copy post-chunks leftovers, if any, caused by request pipelining?
+    if (in.chunked.contentSize()) {
+        xmemmove(end, in.chunked.content(), in.chunked.contentSize());
+        end += in.chunked.contentSize();
+    }
+
+    in.notYetUsed = end - in.buf;
+
+    in.chunked.clean();
+    in.dechunked.clean();
+    in.dechunkingState = chunkUnknown;
+}
+
+// parse newly read request chunks and buffer them for finishDechunkingRequest
+// returns true iff needs more data
+bool
+ConnStateData::parseRequestChunks(HttpParser *)
+{
+    debugs(33,5, HERE << "parsing chunked request body at " <<
+        in.chunkedSeen << " < " << in.notYetUsed);
+    assert(in.bodyParser);
+    assert(in.dechunkingState == chunkParsing);
+
+    assert(in.chunkedSeen <= in.notYetUsed);
+    const mb_size_t fresh = in.notYetUsed - in.chunkedSeen;
+
+    // be safe: count some chunked coding metadata towards the total body size
+    if (fresh + in.dechunked.contentSize() > Config.maxChunkedRequestBodySize) {
+        debugs(33,3, HERE << "chunked body (" << fresh << " + " <<
+            in.dechunked.contentSize() << " may exceed " <<
+            "chunked_request_body_max_size=" <<
+            Config.maxChunkedRequestBodySize);
+        in.dechunkingState = chunkError;
+        return false;
+    }
+        
+    if (fresh > in.chunked.potentialSpaceSize()) {
+        // should not happen if Config.maxChunkedRequestBodySize is reasonable
+        debugs(33,1, HERE << "request_body_max_size exceeds chunked buffer " <<
+            "size: " << fresh << " + " << in.chunked.contentSize() << " > " <<
+            in.chunked.potentialSpaceSize() << " with " <<
+            "chunked_request_body_max_size=" <<
+            Config.maxChunkedRequestBodySize);
+        in.dechunkingState = chunkError;
+        return false;
+    }
+    in.chunked.append(in.buf + in.chunkedSeen, fresh);
+    in.chunkedSeen += fresh;
+
+    try { // the parser will throw on errors
+        if (in.bodyParser->parse(&in.chunked, &in.dechunked))
+            in.dechunkingState = chunkReady; // successfully parsed all chunks
+        else
+            return true; // need more, keep the same state
+    } catch (...) {
+        debugs(33,3, HERE << "chunk parsing error");
+        in.dechunkingState = chunkError;
+    }
+    return false; // error, unsupported, or done
+}
+
 char *
 ConnStateData::In::addressToReadInto() const
 {
     return buf + notYetUsed;
 }
 
-ConnStateData::In::In() : buf (NULL), notYetUsed (0), allocatedSize (0)
+ConnStateData::In::In() : bodyParser(NULL),
+    buf (NULL), notYetUsed (0), allocatedSize (0),
+    dechunkingState(ConnStateData::chunkUnknown)
 {}
 
 ConnStateData::In::~In()
 {
     if (allocatedSize)
         memFreeBuf(allocatedSize, buf);
+    if (bodyParser)
+        delete bodyParser; // TODO: pool
 }
 
 /* This is a comm call normally scheduled by comm_close() */

=== modified file 'src/client_side.h'
--- src/client_side.h	2009-02-19 07:17:31 +0000
+++ src/client_side.h	2009-07-07 21:16:30 +0000
@@ -48,6 +48,9 @@
 
 class AuthUserRequest;
 
+class ChunkedCodingParser;
+class HttpParser;
+
 template <class T>
 class Range;
 
@@ -144,13 +147,22 @@
 
     int fd;
 
+    /// chunk buffering and parsing algorithm state
+    typedef enum { chunkUnknown, chunkNone, chunkParsing, chunkReady, chunkError } DechunkingState;
+
     struct In {
         In();
         ~In();
         char *addressToReadInto() const;
+
+        ChunkedCodingParser *bodyParser; ///< parses chunked request body
+        MemBuf chunked; ///< contains unparsed raw (chunked) body data
+        MemBuf dechunked; ///< accumulates parsed (dechunked) content
         char *buf;
         size_t notYetUsed;
         size_t allocatedSize;
+        size_t chunkedSeen; ///< size of processed or ignored raw read data
+        DechunkingState dechunkingState; ///< request dechunking state
     } in;
 
     int64_t bodySizeLeft();
@@ -253,6 +265,10 @@
     bool switchedToHttps() const { return false; }
 #endif
 
+    void startDechunkingRequest(HttpParser *hp);
+    bool parseRequestChunks(HttpParser *hp);
+    void finishDechunkingRequest(HttpParser *hp);
+
 private:
     int connReadWasError(comm_err_t flag, int size, int xerrno);
     int connFinishedWithConn(int size);

=== modified file 'src/structs.h'
--- src/structs.h	2009-06-28 08:03:28 +0000
+++ src/structs.h	2009-07-06 20:19:22 +0000
@@ -183,6 +183,7 @@
     } Timeout;
     size_t maxRequestHeaderSize;
     int64_t maxRequestBodySize;
+    int64_t maxChunkedRequestBodySize;
     size_t maxReplyHeaderSize;
     acl_size_t *ReplyBodySize;

[PATCH] chunked requests

Reply via email to