Hi all,
Alex ask me to separate chunked request patch from icap access
logging patch as requested by Amos.
This is the chunked request patch developed by Alex.
Christos
=== modified file 'src/HttpMsg.cc'
--- src/HttpMsg.cc 2009-02-02 12:50:23 +0000
+++ src/HttpMsg.cc 2009-07-06 20:28:38 +0000
@@ -309,6 +309,14 @@
return -1;
}
+void
+HttpMsg::setContentLength(int64_t clen)
+{
+ header.delById(HDR_CONTENT_LENGTH); // if any
+ header.putInt64(HDR_CONTENT_LENGTH, clen);
+ content_length = clen;
+}
+
/* returns true if connection should be "persistent"
* after processing this message */
int
=== modified file 'src/HttpMsg.h'
--- src/HttpMsg.h 2009-01-21 03:47:47 +0000
+++ src/HttpMsg.h 2009-07-06 20:28:38 +0000
@@ -58,6 +58,9 @@
///< produce a message copy, except for a few connection-specific settings
virtual HttpMsg *clone() const = 0; ///< \todo rename: not a true copy?
+ /// [re]sets Content-Length header and cached value
+ void setContentLength(int64_t clen);
+
public:
HttpVersion http_ver;
@@ -109,7 +112,8 @@
};
/* Temporary parsing state; might turn into the replacement parser later on */
-struct _HttpParser {
+class HttpParser {
+public:
char state;
const char *buf;
int bufsiz;
@@ -120,7 +124,6 @@
int v_start, v_end;
int v_maj, v_min;
};
-typedef struct _HttpParser HttpParser;
extern void HttpParserInit(HttpParser *, const char *buf, int len);
extern int HttpParserParseReqLine(HttpParser *hp);
=== modified file 'src/cf.data.pre'
--- src/cf.data.pre 2009-06-28 11:47:27 +0000
+++ src/cf.data.pre 2009-07-06 20:19:22 +0000
@@ -3265,6 +3265,33 @@
be no limit imposed.
DOC_END
+NAME: chunked_request_body_max_size
+COMMENT: (bytes)
+TYPE: b_int64_t
+DEFAULT: 64 KB
+LOC: Config.maxChunkedRequestBodySize
+DOC_START
+ A broken or confused HTTP/1.1 client may send a chunked HTTP
+ request to Squid. Squid does not have full support for that
+ feature yet. To cope with such requests, Squid buffers the
+ entire request and then dechunks request body to create a
+ plain HTTP/1.0 request with a known content length. The plain
+ request is then used by the rest of Squid code as usual.
+
+ The option value specifies the maximum size of the buffer used
+ to hold the request before the conversion. If the chunked
+ request size exceeds the specified limit, the conversion
+ fails, and the client receives an "unsupported request" error,
+ as if dechunking was disabled.
+
+ Dechunking is enabled by default. To disable conversion of
+ chunked requests, set the maximum to zero.
+
+ Request dechunking feature and this option in particular are a
+ temporary hack. When chunking requests and responses are fully
+ supported, there will be no need to buffer a chunked request.
+DOC_END
+
NAME: broken_posts
IFDEF: HTTP_VIOLATIONS
TYPE: acl_access
=== modified file 'src/client_side.cc'
--- src/client_side.cc 2009-06-14 12:18:00 +0000
+++ src/client_side.cc 2009-07-07 20:39:04 +0000
@@ -103,6 +103,7 @@
#include "ClientRequestContext.h"
#include "MemBuf.h"
#include "SquidTime.h"
+#include "ChunkedCodingParser.h"
#if LINGERING_CLOSE
#define comm_close comm_lingering_close
@@ -1873,6 +1874,18 @@
}
}
+// Temporary hack helper: determine whether the request is chunked, expensive
+static bool
+isChunkedRequest(const HttpParser *hp) {
+ HttpRequest request;
+ if (!request.parseHeader(HttpParserHdrBuf(hp), HttpParserHdrSz(hp)))
+ return false;
+
+ return request.header.has(HDR_TRANSFER_ENCODING) &&
+ request.header.hasListMember(HDR_TRANSFER_ENCODING, "chunked", ',');
+}
+
+
/**
* parseHttpRequest()
*
@@ -1885,7 +1898,6 @@
static ClientSocketContext *
parseHttpRequest(ConnStateData *conn, HttpParser *hp, HttpRequestMethod * method_p, HttpVersion *http_ver)
{
- char *url = NULL;
char *req_hdr = NULL;
char *end;
size_t req_sz;
@@ -1952,17 +1964,6 @@
return parseHttpRequestAbort(conn, "error:unsupported-request-method");
}
- /* set url */
- /*
- * XXX this should eventually not use a malloc'ed buffer; the transformation code
- * below needs to be modified to not expect a mutable nul-terminated string.
- */
- url = (char *)xmalloc(hp->u_end - hp->u_start + 16);
-
- memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1);
-
- url[hp->u_end - hp->u_start + 1] = '\0';
-
/*
* Process headers after request line
* TODO: Use httpRequestParse here.
@@ -1982,7 +1983,6 @@
*/
if ( squid_strnstr(req_hdr, "\r\r\n", req_sz) ) {
debugs(33, 1, "WARNING: suspicious HTTP request contains double CR");
- xfree(url);
return parseHttpRequestAbort(conn, "error:double-CR");
}
@@ -1990,6 +1990,35 @@
(int) HttpParserRequestLen(hp) << ", req_line_sz = " <<
HttpParserReqSz(hp));
+ // Temporary hack: We might receive a chunked body from a broken HTTP/1.1
+ // client that sends chunked requests to HTTP/1.0 Squid. If the request
+ // might have a chunked body, parse the headers early to look for the
+ // "Transfer-Encoding: chunked" header. If we find it, wait until the
+ // entire body is available so that we can set the content length and
+ // forward the request without chunks. The primary reason for this is
+ // to avoid forwarding a chunked request because the server side lacks
+ // logic to determine when it is valid to do so.
+ // FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS below will replace this hack.
+ if (hp->v_min == 1 && hp->v_maj == 1 && // broken client, may send chunks
+ Config.maxChunkedRequestBodySize > 0 && // configured to dechunk
+ (*method_p == METHOD_PUT || *method_p == METHOD_POST)) {
+
+ // check only once per request because isChunkedRequest is expensive
+ if (conn->in.dechunkingState == ConnStateData::chunkUnknown) {
+ if (isChunkedRequest(hp))
+ conn->startDechunkingRequest(hp);
+ else
+ conn->in.dechunkingState = ConnStateData::chunkNone;
+ }
+
+ if (conn->in.dechunkingState == ConnStateData::chunkParsing) {
+ if (conn->parseRequestChunks(hp)) // parses newly read chunks
+ return NULL; // wait for more data
+ debugs(33, 5, HERE << "Got complete chunked request or err.");
+ assert(conn->in.dechunkingState != ConnStateData::chunkParsing);
+ }
+ }
+
/* Ok, all headers are received */
http = new ClientHttpRequest(conn);
@@ -2006,6 +2035,17 @@
debugs(33, 5, "parseHttpRequest: Request Header is\n" <<(hp->buf) + hp->hdr_start);
+ /* set url */
+ /*
+ * XXX this should eventually not use a malloc'ed buffer; the transformation code
+ * below needs to be modified to not expect a mutable nul-terminated string.
+ */
+ char *url = (char *)xmalloc(hp->u_end - hp->u_start + 16);
+
+ memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1);
+
+ url[hp->u_end - hp->u_start + 1] = '\0';
+
#if THIS_VIOLATES_HTTP_SPECS_ON_URL_TRANSFORMATION
if ((t = strchr(url, '#'))) /* remove HTML anchors */
@@ -2157,6 +2197,11 @@
int
connKeepReadingIncompleteRequest(ConnStateData * conn)
{
+ // when we read chunked requests, the entire body is buffered
+ // XXX: this check ignores header size and its limits.
+ if (conn->in.dechunkingState == ConnStateData::chunkParsing)
+ return conn->in.notYetUsed < Config.maxChunkedRequestBodySize;
+
return conn->in.notYetUsed >= Config.maxRequestHeaderSize ? 0 : 1;
}
@@ -2166,8 +2211,13 @@
ClientSocketContext *context = parseHttpRequestAbort(conn, "error:request-too-large");
clientStreamNode *node = context->getClientReplyContext();
assert(!connKeepReadingIncompleteRequest(conn));
- debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)");
- debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes.");
+ if (conn->in.dechunkingState == ConnStateData::chunkParsing) {
+ debugs(33, 1, "Chunked request is too large (" << conn->in.notYetUsed << " bytes)");
+ debugs(33, 1, "Config 'chunked_request_body_max_size'= " << Config.maxChunkedRequestBodySize << " bytes.");
+ } else {
+ debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)");
+ debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes.");
+ }
clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
assert (repContext);
repContext->setReplyToError(ERR_TOO_BIG,
@@ -2213,6 +2263,9 @@
ClientHttpRequest *http = context->http;
HttpRequest *request = NULL;
bool notedUseOfBuffer = false;
+ bool tePresent = false;
+ bool deChunked = false;
+ bool unsupportedTe = false;
/* We have an initial client stream in place should it be needed */
/* setup our private context */
@@ -2312,8 +2365,17 @@
request->my_addr = conn->me;
request->http_ver = http_ver;
- if (!urlCheckRequest(request) ||
- request->header.has(HDR_TRANSFER_ENCODING)) {
+ tePresent = request->header.has(HDR_TRANSFER_ENCODING);
+ deChunked = conn->in.dechunkingState == ConnStateData::chunkReady;
+ if (deChunked) {
+ assert(tePresent);
+ request->setContentLength(conn->in.dechunked.contentSize());
+ request->header.delById(HDR_TRANSFER_ENCODING);
+ conn->finishDechunkingRequest(hp);
+ }
+
+ unsupportedTe = tePresent && !deChunked;
+ if (!urlCheckRequest(request) || unsupportedTe) {
clientStreamNode *node = context->getClientReplyContext();
clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
assert (repContext);
@@ -2647,13 +2709,73 @@
{
assert(bodyPipe != NULL);
- if (const size_t putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed))
+ size_t putSize = 0;
+
+#if FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS
+ // The code below works, in principle, but we cannot do dechunking
+ // on-the-fly because that would mean sending chunked requests to
+ // the next hop. Squid lacks logic to determine which servers can
+ // receive chunk requests.
+ // The error generation code probably needs more work.
+ if (in.bodyParser) { // chunked body
+ debugs(33,5, HERE << "handling chunked request body for FD " << fd);
+ bool malformedChunks = false;
+
+ MemBuf raw; // ChunkedCodingParser only works with MemBufs
+ raw.init(in.notYetUsed, in.notYetUsed);
+ raw.append(in.buf, in.notYetUsed);
+ try { // the parser will throw on errors
+ const mb_size_t wasContentSize = raw.contentSize();
+ BodyPipeCheckout bpc(*bodyPipe);
+ const bool parsed = in.bodyParser->parse(&raw, &bpc.buf);
+ bpc.checkIn();
+ putSize = wasContentSize - raw.contentSize();
+
+ if (parsed) {
+ stopProducingFor(bodyPipe, true); // this makes bodySize known
+ } else {
+ // parser needy state must imply body pipe needy state
+ if (in.bodyParser->needsMoreData() &&
+ !bodyPipe->mayNeedMoreData())
+ malformedChunks = true;
+ // XXX: if bodyParser->needsMoreSpace, how can we guarantee it?
+ }
+ } catch (...) { // XXX: be more specific
+ malformedChunks = true;
+ }
+
+ if (malformedChunks) {
+ if (bodyPipe != NULL)
+ stopProducingFor(bodyPipe, false);
+
+ ClientSocketContext::Pointer context = getCurrentContext();
+ if (!context->http->out.offset) {
+ clientStreamNode *node = context->getClientReplyContext();
+ clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
+ assert (repContext);
+ repContext->setReplyToError(ERR_INVALID_REQ, HTTP_BAD_REQUEST,
+ METHOD_NONE, NULL, &peer.sin_addr,
+ NULL, NULL, NULL);
+ context->pullData();
+ }
+ flags.readMoreRequests = false;
+ return; // XXX: is that sufficient to generate an error?
+ }
+ } else // identity encoding
+#endif
+ {
+ debugs(33,5, HERE << "handling plain request body for FD " << fd);
+ putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed);
+ if (!bodyPipe->mayNeedMoreData()) {
+ // BodyPipe will clear us automagically when we produced everything
+ bodyPipe = NULL;
+ }
+ }
+
+ if (putSize > 0)
connNoteUseOfBuffer(this, putSize);
- if (!bodyPipe->mayNeedMoreData()) {
- // BodyPipe will clear us automagically when we produced everything
- bodyPipe = NULL;
-
+ if (!bodyPipe) {
debugs(33,5, HERE << "produced entire request body for FD " << fd);
if (closing()) {
@@ -3434,19 +3556,121 @@
bodyPipe->enableAutoConsumption();
}
+// initialize dechunking state
+void
+ConnStateData::startDechunkingRequest(HttpParser *hp)
+{
+ debugs(33, 5, HERE << "start dechunking at " << HttpParserRequestLen(hp));
+ assert(in.dechunkingState == chunkUnknown);
+ assert(!in.bodyParser);
+ in.bodyParser = new ChunkedCodingParser;
+ in.chunkedSeen = HttpParserRequestLen(hp); // skip headers when dechunking
+ in.chunked.init(); // TODO: should we have a smaller-than-default limit?
+ in.dechunked.init();
+ in.dechunkingState = chunkParsing;
+}
+
+// put parsed content into input buffer and clean up
+void
+ConnStateData::finishDechunkingRequest(HttpParser *hp)
+{
+ debugs(33, 5, HERE << "finish dechunking; content: " << in.dechunked.contentSize());
+
+ assert(in.dechunkingState == chunkReady);
+ assert(in.bodyParser);
+ delete in.bodyParser;
+ in.bodyParser = NULL;
+
+ const mb_size_t headerSize = HttpParserRequestLen(hp);
+
+ // dechunking cannot make data bigger
+ assert(headerSize + in.dechunked.contentSize() + in.chunked.contentSize()
+ <= static_cast<mb_size_t>(in.notYetUsed));
+ assert(in.notYetUsed <= in.allocatedSize);
+
+ // copy dechunked content
+ char *end = in.buf + headerSize;
+ xmemmove(end, in.dechunked.content(), in.dechunked.contentSize());
+ end += in.dechunked.contentSize();
+
+ // copy post-chunks leftovers, if any, caused by request pipelining?
+ if (in.chunked.contentSize()) {
+ xmemmove(end, in.chunked.content(), in.chunked.contentSize());
+ end += in.chunked.contentSize();
+ }
+
+ in.notYetUsed = end - in.buf;
+
+ in.chunked.clean();
+ in.dechunked.clean();
+ in.dechunkingState = chunkUnknown;
+}
+
+// parse newly read request chunks and buffer them for finishDechunkingRequest
+// returns true iff needs more data
+bool
+ConnStateData::parseRequestChunks(HttpParser *)
+{
+ debugs(33,5, HERE << "parsing chunked request body at " <<
+ in.chunkedSeen << " < " << in.notYetUsed);
+ assert(in.bodyParser);
+ assert(in.dechunkingState == chunkParsing);
+
+ assert(in.chunkedSeen <= in.notYetUsed);
+ const mb_size_t fresh = in.notYetUsed - in.chunkedSeen;
+
+ // be safe: count some chunked coding metadata towards the total body size
+ if (fresh + in.dechunked.contentSize() > Config.maxChunkedRequestBodySize) {
+ debugs(33,3, HERE << "chunked body (" << fresh << " + " <<
+ in.dechunked.contentSize() << " may exceed " <<
+ "chunked_request_body_max_size=" <<
+ Config.maxChunkedRequestBodySize);
+ in.dechunkingState = chunkError;
+ return false;
+ }
+
+ if (fresh > in.chunked.potentialSpaceSize()) {
+ // should not happen if Config.maxChunkedRequestBodySize is reasonable
+ debugs(33,1, HERE << "request_body_max_size exceeds chunked buffer " <<
+ "size: " << fresh << " + " << in.chunked.contentSize() << " > " <<
+ in.chunked.potentialSpaceSize() << " with " <<
+ "chunked_request_body_max_size=" <<
+ Config.maxChunkedRequestBodySize);
+ in.dechunkingState = chunkError;
+ return false;
+ }
+ in.chunked.append(in.buf + in.chunkedSeen, fresh);
+ in.chunkedSeen += fresh;
+
+ try { // the parser will throw on errors
+ if (in.bodyParser->parse(&in.chunked, &in.dechunked))
+ in.dechunkingState = chunkReady; // successfully parsed all chunks
+ else
+ return true; // need more, keep the same state
+ } catch (...) {
+ debugs(33,3, HERE << "chunk parsing error");
+ in.dechunkingState = chunkError;
+ }
+ return false; // error, unsupported, or done
+}
+
char *
ConnStateData::In::addressToReadInto() const
{
return buf + notYetUsed;
}
-ConnStateData::In::In() : buf (NULL), notYetUsed (0), allocatedSize (0)
+ConnStateData::In::In() : bodyParser(NULL),
+ buf (NULL), notYetUsed (0), allocatedSize (0),
+ dechunkingState(ConnStateData::chunkUnknown)
{}
ConnStateData::In::~In()
{
if (allocatedSize)
memFreeBuf(allocatedSize, buf);
+ if (bodyParser)
+ delete bodyParser; // TODO: pool
}
/* This is a comm call normally scheduled by comm_close() */
=== modified file 'src/client_side.h'
--- src/client_side.h 2009-02-19 07:17:31 +0000
+++ src/client_side.h 2009-07-07 21:16:30 +0000
@@ -48,6 +48,9 @@
class AuthUserRequest;
+class ChunkedCodingParser;
+class HttpParser;
+
template <class T>
class Range;
@@ -144,13 +147,22 @@
int fd;
+ /// chunk buffering and parsing algorithm state
+ typedef enum { chunkUnknown, chunkNone, chunkParsing, chunkReady, chunkError } DechunkingState;
+
struct In {
In();
~In();
char *addressToReadInto() const;
+
+ ChunkedCodingParser *bodyParser; ///< parses chunked request body
+ MemBuf chunked; ///< contains unparsed raw (chunked) body data
+ MemBuf dechunked; ///< accumulates parsed (dechunked) content
char *buf;
size_t notYetUsed;
size_t allocatedSize;
+ size_t chunkedSeen; ///< size of processed or ignored raw read data
+ DechunkingState dechunkingState; ///< request dechunking state
} in;
int64_t bodySizeLeft();
@@ -253,6 +265,10 @@
bool switchedToHttps() const { return false; }
#endif
+ void startDechunkingRequest(HttpParser *hp);
+ bool parseRequestChunks(HttpParser *hp);
+ void finishDechunkingRequest(HttpParser *hp);
+
private:
int connReadWasError(comm_err_t flag, int size, int xerrno);
int connFinishedWithConn(int size);
=== modified file 'src/structs.h'
--- src/structs.h 2009-06-28 08:03:28 +0000
+++ src/structs.h 2009-07-06 20:19:22 +0000
@@ -183,6 +183,7 @@
} Timeout;
size_t maxRequestHeaderSize;
int64_t maxRequestBodySize;
+ int64_t maxChunkedRequestBodySize;
size_t maxReplyHeaderSize;
acl_size_t *ReplyBodySize;