This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch 9.0.x in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit f75509c4439dacf21b065c8d2965169dab5d50bf Author: Mark Thomas <ma...@apache.org> AuthorDate: Fri Apr 26 15:58:37 2024 +0100 Refactor chunked input filter to use common HTTP header/trailer parser This adds non-blocking read support for chunked request bodies --- java/org/apache/coyote/http11/Http11Processor.java | 2 +- .../coyote/http11/filters/ChunkedInputFilter.java | 576 ++++++++------------- webapps/docs/changelog.xml | 3 + 3 files changed, 233 insertions(+), 348 deletions(-) diff --git a/java/org/apache/coyote/http11/Http11Processor.java b/java/org/apache/coyote/http11/Http11Processor.java index 7c5ca6176e..37afea55fe 100644 --- a/java/org/apache/coyote/http11/Http11Processor.java +++ b/java/org/apache/coyote/http11/Http11Processor.java @@ -168,7 +168,7 @@ public class Http11Processor extends AbstractProcessor { // Create and add the chunked filters. inputBuffer.addFilter( - new ChunkedInputFilter(protocol.getMaxTrailerSize(), protocol.getAllowedTrailerHeadersInternal(), + new ChunkedInputFilter(request, protocol.getMaxTrailerSize(), protocol.getAllowedTrailerHeadersInternal(), protocol.getMaxExtensionSize(), protocol.getMaxSwallowSize())); outputBuffer.addFilter(new ChunkedOutputFilter()); diff --git a/java/org/apache/coyote/http11/filters/ChunkedInputFilter.java b/java/org/apache/coyote/http11/filters/ChunkedInputFilter.java index 741cd1078b..f350487771 100644 --- a/java/org/apache/coyote/http11/filters/ChunkedInputFilter.java +++ b/java/org/apache/coyote/http11/filters/ChunkedInputFilter.java @@ -16,11 +16,9 @@ */ package org.apache.coyote.http11.filters; -import java.io.EOFException; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.util.Locale; import java.util.Set; import org.apache.coyote.ActionCode; @@ -31,8 +29,9 @@ import org.apache.coyote.http11.Constants; import org.apache.coyote.http11.InputFilter; import org.apache.tomcat.util.buf.ByteChunk; import org.apache.tomcat.util.buf.HexUtils; -import org.apache.tomcat.util.http.MimeHeaders; -import org.apache.tomcat.util.http.parser.HttpParser; +import org.apache.tomcat.util.http.parser.HttpHeaderParser; +import org.apache.tomcat.util.http.parser.HttpHeaderParser.HeaderDataSource; +import org.apache.tomcat.util.http.parser.HttpHeaderParser.HeaderParseStatus; import org.apache.tomcat.util.net.ApplicationBufferHandler; import org.apache.tomcat.util.res.StringManager; @@ -42,7 +41,7 @@ import org.apache.tomcat.util.res.StringManager; * * @author Remy Maucherat */ -public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler { +public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler, HeaderDataSource { private static final StringManager sm = StringManager.getManager(ChunkedInputFilter.class); @@ -82,28 +81,15 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler /** - * Flag set to true when the end chunk has been read. + * Buffer used to store trailing headers. Is normally in read mode. */ - protected boolean endChunk = false; - - - /** - * Byte chunk used to store trailing headers. - */ - protected final ByteChunk trailingHeaders = new ByteChunk(); - - - /** - * Flag set to true if the next call to doRead() must parse a CRLF pair - * before doing anything else. - */ - protected boolean needCRLFParse = false; + protected final ByteBuffer trailingHeaders; /** * Request being parsed. */ - private Request request; + private final Request request; /** @@ -112,38 +98,31 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler private final long maxExtensionSize; - /** - * Limit for trailer size. - */ - private final int maxTrailerSize; - - - /** - * Size of extensions processed for this request. - */ - private long extensionSize; - - private final int maxSwallowSize; + private final Set<String> allowedTrailerHeaders; - /** - * Flag that indicates if an error has occurred. + /* + * Parsing state. */ - private boolean error; - - - private final Set<String> allowedTrailerHeaders; + private volatile ParseState parseState = ParseState.CHUNK_HEADER; + private volatile boolean crFound = false; + private volatile int chunkSizeDigitsRead = 0; + private volatile boolean parsingExtension = false; + private volatile long extensionSize; + private final HttpHeaderParser httpHeaderParser; // ----------------------------------------------------------- Constructors - public ChunkedInputFilter(int maxTrailerSize, Set<String> allowedTrailerHeaders, + public ChunkedInputFilter(final Request request, int maxTrailerSize, Set<String> allowedTrailerHeaders, int maxExtensionSize, int maxSwallowSize) { - this.trailingHeaders.setLimit(maxTrailerSize); + this.request = request; + this.trailingHeaders = ByteBuffer.allocate(maxTrailerSize); + this.trailingHeaders.limit(0); this.allowedTrailerHeaders = allowedTrailerHeaders; this.maxExtensionSize = maxExtensionSize; - this.maxTrailerSize = maxTrailerSize; this.maxSwallowSize = maxSwallowSize; + this.httpHeaderParser = new HttpHeaderParser(this, request.getMimeTrailerFields(), false); } @@ -151,75 +130,38 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler @Override public int doRead(ApplicationBufferHandler handler) throws IOException { - if (endChunk) { - return -1; - } - - checkError(); - - if(needCRLFParse) { - needCRLFParse = false; - parseCRLF(false); - } - - if (remaining <= 0) { - if (!parseChunkHeader()) { - throwBadRequestException(sm.getString("chunkedInputFilter.invalidHeader")); - } - if (endChunk) { - parseEndChunk(); - if (!request.isRequestThread()) { + while (true) { + switch (parseState) { + case CHUNK_HEADER: + if (!parseChunkHeader()) { + return 0; + } + break; + case CHUNK_BODY: + return parseChunkBody(handler); + case CHUNK_BODY_CRLF: + if (!parseCRLF()) { + return 0; + } + parseState = ParseState.CHUNK_HEADER; + break; + case TRAILER_FIELDS: + if (!parseTrailerFields()) { + return 0; + } /* - * Perform the dispatch back to the container for the onAllDataRead() event. For non-chunked input - * this would be performed when isReady() is next called. - * - * Chunked input returns one chunk at a time for non-blocking reads. A consequence of this is that - * reading the final chunk returns -1 which signals the end of stream. The application code reading - * the request body probably won't call isReady() after receiving the -1 return value since it - * already knows it is at end of stream. Therefore we trigger the dispatch back to the container - * here which in turn ensures the onAllDataRead() event is fired. + * If on a non-container thread the dispatch in parseTrailerFields() will have triggered the + * recycling of this filter so return -1 here else the non-container thread may try and continue + * reading. */ - request.action(ActionCode.DISPATCH_READ, null); - request.action(ActionCode.DISPATCH_EXECUTE, null); - } - return -1; + return -1; + case FINISHED: + return -1; + case ERROR: + default: + throw new IOException(sm.getString("chunkedInputFilter.error")); } } - - int result = 0; - - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() < 0) { - throwEOFException(sm.getString("chunkedInputFilter.eos")); - } - } - - if (remaining > readChunk.remaining()) { - result = readChunk.remaining(); - remaining = remaining - result; - if (readChunk != handler.getByteBuffer()) { - handler.setByteBuffer(readChunk.duplicate()); - } - readChunk.position(readChunk.limit()); - } else { - result = remaining; - if (readChunk != handler.getByteBuffer()) { - handler.setByteBuffer(readChunk.duplicate()); - handler.getByteBuffer().limit(readChunk.position() + remaining); - } - readChunk.position(readChunk.position() + remaining); - remaining = 0; - //we need a CRLF - if ((readChunk.position() + 1) >= readChunk.limit()) { - //if we call parseCRLF we overrun the buffer here - //so we defer it to the next call BZ 11117 - needCRLFParse = true; - } else { - parseCRLF(false); //parse the CRLF immediately - } - } - - return result; } @@ -230,7 +172,7 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler */ @Override public void setRequest(Request request) { - this.request = request; + // NO-OP - Request is fixed and passed to constructor. } @@ -248,7 +190,10 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler throwBadRequestException(sm.getString("inputFilter.maxSwallow")); } } - + // Excess bytes copied to trailingHeaders need to be restored in readChunk for the next request. + if (trailingHeaders.remaining() > 0) { + readChunk.position(readChunk.position() - trailingHeaders.remaining()); + } // Return the number of extra bytes which were consumed return readChunk.remaining(); } @@ -290,12 +235,14 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler if (readChunk != null) { readChunk.position(0).limit(0); } - endChunk = false; - needCRLFParse = false; - trailingHeaders.recycle(); - trailingHeaders.setLimit(maxTrailerSize); + trailingHeaders.clear(); + trailingHeaders.limit(0); + parseState = ParseState.CHUNK_HEADER; + crFound = false; + chunkSizeDigitsRead = 0; + parsingExtension = false; extensionSize = 0; - error = false; + httpHeaderParser.recycle(); } @@ -307,7 +254,7 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler @Override public boolean isFinished() { - return endChunk; + return parseState == ParseState.FINISHED; } @@ -323,6 +270,55 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler } + @Override + public boolean fillHeaderBuffer() throws IOException { + // fill() automatically detects if blocking or non-blocking IO should be used + if (fill()) { + if (trailingHeaders.position() == trailingHeaders.capacity()) { + // At maxTrailerSize. Any further read will exceed maxTrailerSize. + throw new BadRequestException(sm.getString("chunkedInputFilter.maxTrailer")); + } + + // Configure trailing headers for appending additional data + int originalPos = trailingHeaders.position(); + trailingHeaders.position(trailingHeaders.limit()); + trailingHeaders.limit(trailingHeaders.capacity()); + + if (readChunk.remaining() > trailingHeaders.remaining()) { + // readChunk has more data available than trailingHeaders has space so adjust limit + int originalLimit = readChunk.limit(); + readChunk.limit(readChunk.position() + trailingHeaders.remaining()); + trailingHeaders.put(readChunk); + readChunk.limit(originalLimit); + } else { + // readChunk has less data than trailingHeaders has remaining so no need to adjust limit. + trailingHeaders.put(readChunk); + } + + // Configure trailing headers for reading + trailingHeaders.limit(trailingHeaders.position()); + trailingHeaders.position(originalPos); + return true; + } + return false; + } + + + private boolean fill() throws IOException { + if (readChunk == null || readChunk.position() >= readChunk.limit()) { + // Automatically detects if blocking or non-blocking should be used + int read = readBytes(); + if (read < 0) { + // Unexpected end of stream + throwBadRequestException(sm.getString("chunkedInputFilter.invalidHeader")); + } else if (read == 0) { + return false; + } + } + return true; + } + + /** * Parse the header of a chunk. * A chunk header can look like one of the following:<br> @@ -333,45 +329,42 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler * The letters before CRLF or ';' (whatever comes first) must be valid hex * digits. We should not parse F23IAMGONNAMESSTHISUP34CRLF as a valid * header according to the spec. - * @return <code>true</code> if the chunk header has been - * successfully parsed + * + * @return {@code true} if the read is complete or {@code false if incomplete}. In complete reads can only happen + * with non-blocking I/O. + * * @throws IOException Read error */ - protected boolean parseChunkHeader() throws IOException { + private boolean parseChunkHeader() throws IOException { - int result = 0; boolean eol = false; - int readDigit = 0; - boolean extension = false; while (!eol) { - - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() <= 0) { - return false; - } + if (!fill()) { + return false; } byte chr = readChunk.get(readChunk.position()); if (chr == Constants.CR || chr == Constants.LF) { - parseCRLF(false); + parsingExtension = false; + if (!parseCRLF()) { + return false; + } eol = true; - } else if (chr == Constants.SEMI_COLON && !extension) { + } else if (chr == Constants.SEMI_COLON && !parsingExtension) { // First semi-colon marks the start of the extension. Further // semi-colons may appear to separate multiple chunk-extensions. // These need to be processed as part of parsing the extensions. - extension = true; + parsingExtension = true; extensionSize++; - } else if (!extension) { - //don't read data after the trailer + } else if (!parsingExtension) { int charValue = HexUtils.getDec(chr); - if (charValue != -1 && readDigit < 8) { - readDigit++; - result = (result << 4) | charValue; + if (charValue != -1 && chunkSizeDigitsRead < 8) { + chunkSizeDigitsRead++; + remaining = (remaining << 4) | charValue; } else { - //we shouldn't allow invalid, non hex characters - //in the chunked header - return false; + // Isn't valid hex so this is an error condition + throwBadRequestException(sm.getString("chunkedInputFilter.invalidHeader")); } } else { // Extension 'parsing' @@ -389,47 +382,77 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler } } - if (readDigit == 0 || result < 0) { - return false; + if (chunkSizeDigitsRead == 0 || remaining < 0) { + throwBadRequestException(sm.getString("chunkedInputFilter.invalidHeader")); + } else { + chunkSizeDigitsRead = 0; } - if (result == 0) { - endChunk = true; + if (remaining == 0) { + parseState = ParseState.TRAILER_FIELDS; + } else { + parseState = ParseState.CHUNK_BODY; } - remaining = result; return true; } + private int parseChunkBody(ApplicationBufferHandler handler) throws IOException { + int result = 0; + + if (!fill()) { + return 0; + } + + if (remaining > readChunk.remaining()) { + result = readChunk.remaining(); + remaining = remaining - result; + if (readChunk != handler.getByteBuffer()) { + handler.setByteBuffer(readChunk.duplicate()); + } + readChunk.position(readChunk.limit()); + } else { + result = remaining; + if (readChunk != handler.getByteBuffer()) { + handler.setByteBuffer(readChunk.duplicate()); + handler.getByteBuffer().limit(readChunk.position() + remaining); + } + readChunk.position(readChunk.position() + remaining); + remaining = 0; + + parseState = ParseState.CHUNK_BODY_CRLF; + } + + return result; + } + + /** * Parse CRLF at end of chunk. * - * @param tolerant Should tolerant parsing (LF and CRLF) be used? This - * is recommended (RFC2616, section 19.3) for message - * headers. + * @return {@code true} if the read is complete or {@code false if incomplete}. In complete reads can only happen + * with non-blocking I/O. + * * @throws IOException An error occurred parsing CRLF */ - protected void parseCRLF(boolean tolerant) throws IOException { + private boolean parseCRLF() throws IOException { boolean eol = false; - boolean crfound = false; while (!eol) { - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() <= 0) { - throwBadRequestException(sm.getString("chunkedInputFilter.invalidCrlfNoData")); - } + if (!fill()) { + return false; } byte chr = readChunk.get(readChunk.position()); if (chr == Constants.CR) { - if (crfound) { + if (crFound) { throwBadRequestException(sm.getString("chunkedInputFilter.invalidCrlfCRCR")); } - crfound = true; + crFound = true; } else if (chr == Constants.LF) { - if (!tolerant && !crfound) { + if (!crFound) { throwBadRequestException(sm.getString("chunkedInputFilter.invalidCrlfNoCR")); } eol = true; @@ -439,218 +462,61 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler readChunk.position(readChunk.position() + 1); } + + crFound = false; + return true; } /** * Parse end chunk data. + * + * @return {@code true} if the read is complete or {@code false if incomplete}. In complete reads can only happen + * with non-blocking I/O. + * * @throws IOException Error propagation */ - protected void parseEndChunk() throws IOException { + private boolean parseTrailerFields() throws IOException { // Handle optional trailer headers - while (parseHeader()) { - // Loop until we run out of headers - } - } - - - private boolean parseHeader() throws IOException { - - /* - * Implementation note: Any changes to this method probably need to be echoed in - * Http11InputBuffer.parseHeader(). Why not use a common implementation? In short, this code uses blocking - * reads whereas Http11InputBuffer using non-blocking reads. The code is just different enough that a common - * implementation wasn't viewed as practical. - */ - - MimeHeaders headers = request.getMimeTrailerFields(); - - byte chr = 0; - - // Read new bytes if needed - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() <0) { - throwEOFException(sm.getString("chunkedInputFilter.eosTrailer")); - } - } - - // readBytes() above will set readChunk unless it returns a value < 0 - chr = readChunk.get(readChunk.position()); - - // CRLF terminates the request - if (chr == Constants.CR || chr == Constants.LF) { - parseCRLF(false); - return false; - } - - // Mark the current buffer position - int startPos = trailingHeaders.getEnd(); - - // - // Reading the header name - // Header name is always US-ASCII - // - - boolean colon = false; - while (!colon) { - - // Read new bytes if needed - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() <0) { - throwEOFException(sm.getString("chunkedInputFilter.eosTrailer")); - } + HeaderParseStatus status = HeaderParseStatus.HAVE_MORE_HEADERS; + do { + try { + status = httpHeaderParser.parseHeader(); + } catch (IllegalArgumentException iae) { + parseState = ParseState.ERROR; + throw new BadRequestException(iae); } - - // readBytes() above will set readChunk unless it returns a value < 0 - chr = readChunk.get(readChunk.position()); - if (chr >= Constants.A && chr <= Constants.Z) { - chr = (byte) (chr - Constants.LC_OFFSET); - } - - if (chr == Constants.COLON) { - colon = true; - } else if (!HttpParser.isToken(chr)) { - // Non-token characters are illegal in header names - throwBadRequestException(sm.getString("chunkedInputFilter.invalidTrailerHeaderName")); - } else if (trailingHeaders.getEnd() >= trailingHeaders.getLimit()) { - throwBadRequestException(sm.getString("chunkedInputFilter.maxTrailer")); - } else { - trailingHeaders.append(chr); + } while (status == HeaderParseStatus.HAVE_MORE_HEADERS); + if (status == HeaderParseStatus.DONE) { + parseState = ParseState.FINISHED; + request.getMimeTrailerFields().filter(allowedTrailerHeaders); + if (request.getReadListener() != null) { + /* + * Perform the dispatch back to the container for the onAllDataRead() event. For non-chunked input + * this would be performed when isReady() is next called. + * + * Chunked input returns one chunk at a time for non-blocking reads. A consequence of this is that + * reading the final chunk returns -1 which signals the end of stream. The application code reading + * the request body probably won't call isReady() after receiving the -1 return value since it + * already knows it is at end of stream. Therefore we trigger the dispatch back to the container + * here which in turn ensures the onAllDataRead() event is fired. + */ + request.action(ActionCode.DISPATCH_READ, null); + request.action(ActionCode.DISPATCH_EXECUTE, null); } - - readChunk.position(readChunk.position() + 1); - - } - int colonPos = trailingHeaders.getEnd(); - - // - // Reading the header value (which can be spanned over multiple lines) - // - - boolean eol = false; - boolean validLine = true; - int lastSignificantChar = 0; - - while (validLine) { - - boolean space = true; - - // Skipping spaces - while (space) { - - // Read new bytes if needed - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() <0) { - throwEOFException(sm.getString("chunkedInputFilter.eosTrailer")); - } - } - - chr = readChunk.get(readChunk.position()); - if (chr == Constants.SP || chr == Constants.HT) { - readChunk.position(readChunk.position() + 1); - // If we swallow whitespace, make sure it counts towards the - // limit placed on trailing header size - int newlimit = trailingHeaders.getLimit() -1; - if (trailingHeaders.getEnd() > newlimit) { - throwBadRequestException(sm.getString("chunkedInputFilter.maxTrailer")); - } - trailingHeaders.setLimit(newlimit); - } else { - space = false; - } - - } - - // Reading bytes until the end of the line - while (!eol) { - - // Read new bytes if needed - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() <0) { - throwEOFException(sm.getString("chunkedInputFilter.eosTrailer")); - } - } - - chr = readChunk.get(readChunk.position()); - if (chr == Constants.CR || chr == Constants.LF) { - parseCRLF(true); - eol = true; - } else if (HttpParser.isControl(chr) && chr != Constants.HT) { - throw new IOException(sm.getString("chunkedInputFilter.invalidTrailerHeaderValue")); - } else if (trailingHeaders.getEnd() >= trailingHeaders.getLimit()) { - throwBadRequestException(sm.getString("chunkedInputFilter.maxTrailer")); - } else if (chr == Constants.SP || chr == Constants.HT) { - trailingHeaders.append(chr); - } else { - trailingHeaders.append(chr); - lastSignificantChar = trailingHeaders.getEnd(); - } - - if (!eol) { - readChunk.position(readChunk.position() + 1); - } - } - - // Checking the first character of the new line. If the character - // is a LWS, then it's a multiline header - - // Read new bytes if needed - if (readChunk == null || readChunk.position() >= readChunk.limit()) { - if (readBytes() <0) { - throwEOFException(sm.getString("chunkedInputFilter.eosTrailer")); - } - } - - chr = readChunk.get(readChunk.position()); - if (chr != Constants.SP && chr != Constants.HT) { - validLine = false; - } else if (trailingHeaders.getEnd() >= trailingHeaders.getLimit()) { - throwBadRequestException(sm.getString("chunkedInputFilter.maxTrailer")); - } else { - eol = false; - // Copying one extra space in the buffer (since there must - // be at least one space inserted between the lines) - trailingHeaders.append(chr); - } - - } - - String headerName = new String(trailingHeaders.getBytes(), startPos, - colonPos - startPos, StandardCharsets.ISO_8859_1); - - headerName = headerName.toLowerCase(Locale.ENGLISH); - - if (allowedTrailerHeaders.contains(headerName)) { - - String value = new String(trailingHeaders.getBytes(), colonPos, - lastSignificantChar - colonPos, StandardCharsets.ISO_8859_1); - - headers.addValue(headerName).setString(value); + return true; + } else { + return false; } - - return true; } private void throwBadRequestException(String msg) throws IOException { - error = true; + parseState = ParseState.ERROR; throw new BadRequestException(msg); } - private void throwEOFException(String msg) throws IOException { - error = true; - throw new EOFException(msg); - } - - - private void checkError() throws IOException { - if (error) { - throw new IOException(sm.getString("chunkedInputFilter.error")); - } - } - - @Override public void setByteBuffer(ByteBuffer buffer) { readChunk = buffer; @@ -663,8 +529,24 @@ public class ChunkedInputFilter implements InputFilter, ApplicationBufferHandler } + @Override + public ByteBuffer getHeaderByteBuffer() { + return trailingHeaders; + } + + @Override public void expand(int size) { // no-op } + + + private enum ParseState { + CHUNK_HEADER, + CHUNK_BODY, + CHUNK_BODY_CRLF, + TRAILER_FIELDS, + FINISHED, + ERROR + } } diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index fc69d83549..a944a64d85 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -178,6 +178,9 @@ Ensure that multiple instances of the same trailer field are handled correctly. (markt) </fix> + <fix> + Fix non-blocking reads of chunked request bodies. (markt) + </fix> </changelog> </subsection> <subsection name="Other"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org