This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit e5acf2cf0f745350c85d81532826d92b1882469a Author: Mark Thomas <ma...@apache.org> AuthorDate: Fri Apr 26 16:06:52 2024 +0100 Refactor HTTP header parsing to use common parser --- java/org/apache/coyote/http11/Constants.java | 12 + .../apache/coyote/http11/Http11InputBuffer.java | 375 ++------------------- .../apache/coyote/http11/LocalStrings.properties | 2 - .../coyote/http11/LocalStrings_fr.properties | 2 - .../coyote/http11/LocalStrings_ja.properties | 2 - .../coyote/http11/LocalStrings_ko.properties | 1 - .../coyote/http11/LocalStrings_zh_CN.properties | 1 - webapps/docs/changelog.xml | 3 + 8 files changed, 43 insertions(+), 355 deletions(-) diff --git a/java/org/apache/coyote/http11/Constants.java b/java/org/apache/coyote/http11/Constants.java index 700834c7ad..b580819b28 100644 --- a/java/org/apache/coyote/http11/Constants.java +++ b/java/org/apache/coyote/http11/Constants.java @@ -72,19 +72,28 @@ public final class Constants { /** * 'A'. + * + * @deprecated Unused. Will be removed in Tomcat 11. */ + @Deprecated public static final byte A = (byte) 'A'; /** * 'a'. + * + * @deprecated Unused. Will be removed in Tomcat 11. */ + @Deprecated public static final byte a = (byte) 'a'; /** * 'Z'. + * + * @deprecated Unused. Will be removed in Tomcat 11. */ + @Deprecated public static final byte Z = (byte) 'Z'; @@ -96,7 +105,10 @@ public final class Constants { /** * Lower case offset. + * + * @deprecated Unused. Will be removed in Tomcat 11. */ + @Deprecated public static final byte LC_OFFSET = A - a; diff --git a/java/org/apache/coyote/http11/Http11InputBuffer.java b/java/org/apache/coyote/http11/Http11InputBuffer.java index fea3b2fe46..c047a26701 100644 --- a/java/org/apache/coyote/http11/Http11InputBuffer.java +++ b/java/org/apache/coyote/http11/Http11InputBuffer.java @@ -27,9 +27,10 @@ import org.apache.coyote.InputBuffer; import org.apache.coyote.Request; import org.apache.juli.logging.Log; import org.apache.juli.logging.LogFactory; -import org.apache.tomcat.util.buf.MessageBytes; import org.apache.tomcat.util.http.HeaderUtil; -import org.apache.tomcat.util.http.MimeHeaders; +import org.apache.tomcat.util.http.parser.HttpHeaderParser; +import org.apache.tomcat.util.http.parser.HttpHeaderParser.HeaderDataSource; +import org.apache.tomcat.util.http.parser.HttpHeaderParser.HeaderParseStatus; import org.apache.tomcat.util.http.parser.HttpParser; import org.apache.tomcat.util.net.ApplicationBufferHandler; import org.apache.tomcat.util.net.SocketWrapperBase; @@ -38,7 +39,7 @@ import org.apache.tomcat.util.res.StringManager; /** * InputBuffer for HTTP that provides request header parsing as well as transfer encoding. */ -public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler { +public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler, HeaderDataSource { // -------------------------------------------------------------- Constants @@ -59,12 +60,6 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler private final Request request; - /** - * Headers of the associated request. - */ - private final MimeHeaders headers; - - /** * State. */ @@ -129,9 +124,8 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler private boolean parsingRequestLineEol = false; private int parsingRequestLineStart = 0; private int parsingRequestLineQPos = -1; - private HeaderParsePosition headerParsePos; - private final HeaderParseData headerData = new HeaderParseData(); private final HttpParser httpParser; + private final HttpHeaderParser httpHeaderParser; /** * Maximum allowed size of the HTTP request line plus headers plus any leading blank lines. @@ -149,7 +143,6 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler public Http11InputBuffer(Request request, int headerBufferSize, HttpParser httpParser) { this.request = request; - headers = request.getMimeHeaders(); this.headerBufferSize = headerBufferSize; this.httpParser = httpParser; @@ -158,13 +151,15 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler activeFilters = new InputFilter[0]; lastActiveFilter = -1; - parsingHeader = true; parsingRequestLine = true; parsingRequestLinePhase = 0; parsingRequestLineEol = false; parsingRequestLineStart = 0; parsingRequestLineQPos = -1; - headerParsePos = HeaderParsePosition.HEADER_START; + + parsingHeader = true; + httpHeaderParser = new HttpHeaderParser(this, request.getMimeHeaders(), true); + swallowInput = true; inputStreamInputBuffer = new SocketInputBuffer(); @@ -261,12 +256,11 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler chr = 0; prevChr = 0; - headerParsePos = HeaderParsePosition.HEADER_START; parsingRequestLinePhase = 0; parsingRequestLineEol = false; parsingRequestLineStart = 0; parsingRequestLineQPos = -1; - headerData.recycle(); + httpHeaderParser.recycle(); // Recycled last because they are volatile // All variables visible to this thread are guaranteed to be visible to // any other thread once that thread reads the same volatile. The first @@ -304,13 +298,12 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler parsingHeader = true; swallowInput = true; - headerParsePos = HeaderParsePosition.HEADER_START; parsingRequestLine = true; parsingRequestLinePhase = 0; parsingRequestLineEol = false; parsingRequestLineStart = 0; parsingRequestLineQPos = -1; - headerData.recycle(); + httpHeaderParser.recycle(); } @@ -584,7 +577,7 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler HeaderParseStatus status = HeaderParseStatus.HAVE_MORE_HEADERS; do { - status = parseHeader(); + status = httpHeaderParser.parseHeader(); // Checking that // (1) Headers plus request line size does not exceed its limit // (2) There are enough bytes to avoid expanding the buffer when @@ -741,12 +734,21 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler } - // --------------------------------------------------------- Private Methods + @Override + public boolean fillHeaderBuffer() throws IOException { + // HTTP headers are always read using non-blocking IO. + return fill(false); + } + /** * Attempts to read some data into the input buffer. * + * @param block Should blocking IO be used when filling the input buffer + * * @return <code>true</code> if more data was added to the input buffer otherwise <code>false</code> + * + * @throws IOException if an IO error occurs while filling the input buffer */ private boolean fill(boolean block) throws IOException { @@ -818,333 +820,6 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler } - /** - * Parse an HTTP header. - * - * @return One of {@link HeaderParseStatus#NEED_MORE_DATA}, {@link HeaderParseStatus#HAVE_MORE_HEADERS} or - * {@link HeaderParseStatus#DONE}. - */ - private HeaderParseStatus parseHeader() throws IOException { - - /* - * Implementation note: Any changes to this method probably need to be echoed in - * ChunkedInputFilter.parseHeader(). Why not use a common implementation? In short, this code uses non-blocking - * reads whereas ChunkedInputFilter using blocking reads. The code is just different enough that a common - * implementation wasn't viewed as practical. - */ - while (headerParsePos == HeaderParsePosition.HEADER_START) { - - // Read new bytes if needed - if (byteBuffer.position() >= byteBuffer.limit()) { - if (!fill(false)) { - return HeaderParseStatus.NEED_MORE_DATA; - } - } - - prevChr = chr; - chr = byteBuffer.get(); - - if (chr == Constants.CR && prevChr != Constants.CR) { - // Possible start of CRLF - process the next byte. - } else if (chr == Constants.LF) { - // CRLF or LF is an acceptable line terminator - return HeaderParseStatus.DONE; - } else { - if (prevChr == Constants.CR) { - // Must have read two bytes (first was CR, second was not LF) - byteBuffer.position(byteBuffer.position() - 2); - } else { - // Must have only read one byte - byteBuffer.position(byteBuffer.position() - 1); - } - break; - } - } - - if (headerParsePos == HeaderParsePosition.HEADER_START) { - // Mark the current buffer position - headerData.start = byteBuffer.position(); - headerData.lineStart = headerData.start; - headerParsePos = HeaderParsePosition.HEADER_NAME; - } - - // - // Reading the header name - // Header name is always US-ASCII - // - - while (headerParsePos == HeaderParsePosition.HEADER_NAME) { - - // Read new bytes if needed - if (byteBuffer.position() >= byteBuffer.limit()) { - if (!fill(false)) { // parse header - return HeaderParseStatus.NEED_MORE_DATA; - } - } - - int pos = byteBuffer.position(); - chr = byteBuffer.get(); - if (chr == Constants.COLON) { - if (headerData.start == pos) { - // Zero length header name - not valid. - // skipLine() will handle the error - return skipLine(); - } - headerParsePos = HeaderParsePosition.HEADER_VALUE_START; - headerData.headerValue = headers.addValue(byteBuffer.array(), headerData.start, pos - headerData.start); - pos = byteBuffer.position(); - // Mark the current buffer position - headerData.start = pos; - headerData.realPos = pos; - headerData.lastSignificantChar = pos; - break; - } else if (!HttpParser.isToken(chr)) { - // Non-token characters are illegal in header names - // Parsing continues so the error can be reported in context - headerData.lastSignificantChar = pos; - byteBuffer.position(byteBuffer.position() - 1); - // skipLine() will handle the error - return skipLine(); - } - - // chr is next byte of header name. Convert to lowercase. - if (chr >= Constants.A && chr <= Constants.Z) { - byteBuffer.put(pos, (byte) (chr - Constants.LC_OFFSET)); - } - } - - // Skip the line and ignore the header - if (headerParsePos == HeaderParsePosition.HEADER_SKIPLINE) { - return skipLine(); - } - - // - // Reading the header value (which can be spanned over multiple lines) - // - - while (headerParsePos == HeaderParsePosition.HEADER_VALUE_START || - headerParsePos == HeaderParsePosition.HEADER_VALUE || - headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) { - - if (headerParsePos == HeaderParsePosition.HEADER_VALUE_START) { - // Skipping spaces - while (true) { - // Read new bytes if needed - if (byteBuffer.position() >= byteBuffer.limit()) { - if (!fill(false)) {// parse header - // HEADER_VALUE_START - return HeaderParseStatus.NEED_MORE_DATA; - } - } - - chr = byteBuffer.get(); - if (chr != Constants.SP && chr != Constants.HT) { - headerParsePos = HeaderParsePosition.HEADER_VALUE; - byteBuffer.position(byteBuffer.position() - 1); - // Avoids prevChr = chr at start of header value - // parsing which causes problems when chr is CR - // (in the case of an empty header value) - chr = 0; - break; - } - } - } - if (headerParsePos == HeaderParsePosition.HEADER_VALUE) { - - // Reading bytes until the end of the line - boolean eol = false; - while (!eol) { - - // Read new bytes if needed - if (byteBuffer.position() >= byteBuffer.limit()) { - if (!fill(false)) {// parse header - // HEADER_VALUE - return HeaderParseStatus.NEED_MORE_DATA; - } - } - - prevChr = chr; - chr = byteBuffer.get(); - if (chr == Constants.CR && prevChr != Constants.CR) { - // CR is only permitted at the start of a CRLF sequence. - // Possible start of CRLF - process the next byte. - } else if (chr == Constants.LF) { - // CRLF or LF is an acceptable line terminator - eol = true; - } else if (prevChr == Constants.CR) { - // Invalid value - also need to delete header - return skipLine(); - } else if (HttpParser.isControl(chr) && chr != Constants.HT) { - // Invalid value - also need to delete header - return skipLine(); - } else if (chr == Constants.SP || chr == Constants.HT) { - byteBuffer.put(headerData.realPos, chr); - headerData.realPos++; - } else { - byteBuffer.put(headerData.realPos, chr); - headerData.realPos++; - headerData.lastSignificantChar = headerData.realPos; - } - } - - // Ignore whitespaces at the end of the line - headerData.realPos = headerData.lastSignificantChar; - - // Checking the first character of the new line. If the character - // is a LWS, then it's a multiline header - headerParsePos = HeaderParsePosition.HEADER_MULTI_LINE; - } - // Read new bytes if needed - if (byteBuffer.position() >= byteBuffer.limit()) { - if (!fill(false)) {// parse header - // HEADER_MULTI_LINE - return HeaderParseStatus.NEED_MORE_DATA; - } - } - - byte peek = byteBuffer.get(byteBuffer.position()); - if (headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) { - if (peek != Constants.SP && peek != Constants.HT) { - headerParsePos = HeaderParsePosition.HEADER_START; - break; - } else { - // Copying one extra space in the buffer (since there must - // be at least one space inserted between the lines) - byteBuffer.put(headerData.realPos, peek); - headerData.realPos++; - headerParsePos = HeaderParsePosition.HEADER_VALUE_START; - } - } - } - // Set the header value - headerData.headerValue.setBytes(byteBuffer.array(), headerData.start, - headerData.lastSignificantChar - headerData.start); - headerData.recycle(); - return HeaderParseStatus.HAVE_MORE_HEADERS; - } - - - private HeaderParseStatus skipLine() throws IOException { - // Parse the rest of the invalid header so we can construct a useful - // exception and/or debug message. - headerParsePos = HeaderParsePosition.HEADER_SKIPLINE; - boolean eol = false; - - // Reading bytes until the end of the line - while (!eol) { - - // Read new bytes if needed - if (byteBuffer.position() >= byteBuffer.limit()) { - if (!fill(false)) { - return HeaderParseStatus.NEED_MORE_DATA; - } - } - - int pos = byteBuffer.position(); - prevChr = chr; - chr = byteBuffer.get(); - if (chr == Constants.CR) { - // Skip - } else if (chr == Constants.LF) { - // CRLF or LF is an acceptable line terminator - eol = true; - } else { - headerData.lastSignificantChar = pos; - } - } - - throw new IllegalArgumentException( - sm.getString("iib.invalidheader.reject", HeaderUtil.toPrintableString(byteBuffer.array(), - headerData.lineStart, headerData.lastSignificantChar - headerData.lineStart + 1))); - } - - - // ----------------------------------------------------------- Inner classes - - private enum HeaderParseStatus { - DONE, - HAVE_MORE_HEADERS, - NEED_MORE_DATA - } - - - private enum HeaderParsePosition { - /** - * Start of a new header. A CRLF here means that there are no more headers. Any other character starts a header - * name. - */ - HEADER_START, - /** - * Reading a header name. All characters of header are HTTP_TOKEN_CHAR. Header name is followed by ':'. No - * whitespace is allowed.<br> - * Any non-HTTP_TOKEN_CHAR (this includes any whitespace) encountered before ':' will result in the whole line - * being ignored. - */ - HEADER_NAME, - /** - * Skipping whitespace before text of header value starts, either on the first line of header value (just after - * ':') or on subsequent lines when it is known that subsequent line starts with SP or HT. - */ - HEADER_VALUE_START, - /** - * Reading the header value. We are inside the value. Either on the first line or on any subsequent line. We - * come into this state from HEADER_VALUE_START after the first non-SP/non-HT byte is encountered on the line. - */ - HEADER_VALUE, - /** - * Before reading a new line of a header. Once the next byte is peeked, the state changes without advancing our - * position. The state becomes either HEADER_VALUE_START (if that first byte is SP or HT), or HEADER_START - * (otherwise). - */ - HEADER_MULTI_LINE, - /** - * Reading all bytes until the next CRLF. The line is being ignored. - */ - HEADER_SKIPLINE - } - - - private static class HeaderParseData { - /** - * The first character of the header line. - */ - int lineStart = 0; - /** - * When parsing header name: first character of the header.<br> - * When skipping broken header line: first character of the header.<br> - * When parsing header value: first character after ':'. - */ - int start = 0; - /** - * When parsing header name: not used (stays as 0).<br> - * When skipping broken header line: not used (stays as 0).<br> - * When parsing header value: starts as the first character after ':'. Then is increased as far as more bytes of - * the header are harvested. Bytes from buf[pos] are copied to buf[realPos]. Thus the string from [start] to - * [realPos-1] is the prepared value of the header, with whitespaces removed as needed.<br> - */ - int realPos = 0; - /** - * When parsing header name: not used (stays as 0).<br> - * When skipping broken header line: last non-CR/non-LF character.<br> - * When parsing header value: position after the last not-LWS character.<br> - */ - int lastSignificantChar = 0; - /** - * MB that will store the value of the header. It is null while parsing header name and is created after the - * name has been parsed. - */ - MessageBytes headerValue = null; - - public void recycle() { - lineStart = 0; - start = 0; - realPos = 0; - lastSignificantChar = 0; - headerValue = null; - } - } - - // ------------------------------------- InputStreamInputBuffer Inner Class /** @@ -1193,6 +868,12 @@ public class Http11InputBuffer implements InputBuffer, ApplicationBufferHandler } + @Override + public ByteBuffer getHeaderByteBuffer() { + return getByteBuffer(); + } + + @Override public void expand(int size) { if (byteBuffer.capacity() >= size) { diff --git a/java/org/apache/coyote/http11/LocalStrings.properties b/java/org/apache/coyote/http11/LocalStrings.properties index e42b425350..bb45718ebf 100644 --- a/java/org/apache/coyote/http11/LocalStrings.properties +++ b/java/org/apache/coyote/http11/LocalStrings.properties @@ -49,8 +49,6 @@ iib.filter.npe=You may not add a null filter iib.invalidHttpProtocol=Invalid character found in the HTTP protocol [{0}] iib.invalidPhase=Invalid request line parse phase [{0}] iib.invalidRequestTarget=Invalid character found in the request target [{0}]. The valid characters are defined in RFC 7230 and RFC 3986 -iib.invalidheader=The HTTP header line [{0}] does not conform to RFC 7230 and has been ignored. -iib.invalidheader.reject=The HTTP header line [{0}] does not conform to RFC 7230. The request has been rejected. iib.invalidmethod=Invalid character found in method name [{0}]. HTTP method names must be tokens iib.parseheaders.ise.error=Unexpected state: headers already parsed. Buffer not recycled? iib.readtimeout=Timeout attempting to read data from the socket diff --git a/java/org/apache/coyote/http11/LocalStrings_fr.properties b/java/org/apache/coyote/http11/LocalStrings_fr.properties index 1c419a6139..1dc15c7036 100644 --- a/java/org/apache/coyote/http11/LocalStrings_fr.properties +++ b/java/org/apache/coyote/http11/LocalStrings_fr.properties @@ -46,8 +46,6 @@ iib.filter.npe=Impossible d'ajouter un filtre null iib.invalidHttpProtocol=Un caractère invalide a été trouvé dans le protocole HTTP iib.invalidPhase=Etape invalide de traitement [{0}] de la ligne de requête iib.invalidRequestTarget=Un caractère invalide a été trouvé dans la cible de la requête, les caractères valides sont définis dans RFC 7230 et RFC 3986 -iib.invalidheader=La ligne d''en-être HTTP [{0}] n''est pas conforme à la RFC 7230 et a été ignorée -iib.invalidheader.reject=La ligne d''en-tête HTTP [{0}] ne respecte pas la RFC 7230. La requête a été rejetée. iib.invalidmethod=Caractère invalide trouvé dans le nom de méthode. Les noms HTTP doivent être des "token". iib.parseheaders.ise.error=Etat inattendu, les en-êtres ont déjà été traités, il est possible que le buffer n'ait pas été recyclé iib.readtimeout=Délai d'attente dépassé en lisant des données du socket diff --git a/java/org/apache/coyote/http11/LocalStrings_ja.properties b/java/org/apache/coyote/http11/LocalStrings_ja.properties index 1b29ac7586..5120c732fb 100644 --- a/java/org/apache/coyote/http11/LocalStrings_ja.properties +++ b/java/org/apache/coyote/http11/LocalStrings_ja.properties @@ -46,8 +46,6 @@ iib.filter.npe=Nullフィルタを追加することはできません。 iib.invalidHttpProtocol=HTTPプロトコルで無効な文字が見つかりました。 iib.invalidPhase=リクエスト行の解析フェーズ [{0}] は無効です iib.invalidRequestTarget=リクエストの宛先 [{0}] に無効な文字が含まれています。利用可能な文字は RFC 7230 および RFC 3986 に定義されています。 -iib.invalidheader=HTTP ヘッダー行 [{0}]は RFC 7230 に適合しないため無視します。 -iib.invalidheader.reject=HTTP ヘッダーの [{0}] 行目は RFC 7230 に準拠していません。リクエストは拒否されました。 iib.invalidmethod=HTTP メソッド名 [{0}] に無効な文字が含まれています。HTTP メソッド名は決められたトークンでなければなりません iib.parseheaders.ise.error=予期しない状態:ヘッダがすでに解析されています。バッファが未回収ですか? iib.readtimeout=ソケットからデータを読み取ろうとしている際のタイムアウト diff --git a/java/org/apache/coyote/http11/LocalStrings_ko.properties b/java/org/apache/coyote/http11/LocalStrings_ko.properties index aa601d72f8..c5c055dd80 100644 --- a/java/org/apache/coyote/http11/LocalStrings_ko.properties +++ b/java/org/apache/coyote/http11/LocalStrings_ko.properties @@ -46,7 +46,6 @@ iib.filter.npe=널인 필터를 추가할 수 없습니다. iib.invalidHttpProtocol=HTTP 프로토콜에서 유효하지 않은 문자가 발견되었습니다. iib.invalidPhase=파싱 국면 [{0}]에서, 유효하지 않은 HTTP 요청 라인 오류 iib.invalidRequestTarget=요청 타겟에서 유효하지 않은 문자가 발견되었습니다. 유효한 문자들은 RFC 7230과 RFC 3986에 정의되어 있습니다. -iib.invalidheader=HTTP 헤더 행 [{0}]이(가) RFC 7230을 준수하지 않아, 무시되었습니다. iib.invalidmethod=메소드 이름에 유효하지 않은 문자가 발견되었습니다. HTTP 메소드 이름은 유효한 토큰이어야 합니다. iib.parseheaders.ise.error=예기치 않은 상태: 헤더들이 이미 파싱되었습니다. 버퍼가 참조 해제되지 않았었나요? iib.readtimeout=소켓으로부터 데이터를 읽으려 시도하는 중 제한 시간 초과 diff --git a/java/org/apache/coyote/http11/LocalStrings_zh_CN.properties b/java/org/apache/coyote/http11/LocalStrings_zh_CN.properties index 176a16f35f..df24320472 100644 --- a/java/org/apache/coyote/http11/LocalStrings_zh_CN.properties +++ b/java/org/apache/coyote/http11/LocalStrings_zh_CN.properties @@ -46,7 +46,6 @@ iib.filter.npe=你不能添加空过滤器(null) iib.invalidHttpProtocol=在HTTP协议中发现无效字符[{0}] iib.invalidPhase=无效的请求行解析阶段[{0}] iib.invalidRequestTarget=在请求目标中找到无效字符[{0}]。有效字符在RFC 7230和RFC 3986中定义 -iib.invalidheader=HTTP header行 [{0}] 不符合RFC 7230并且已被忽略。 iib.invalidmethod=在方法名称[{0}]中发现无效的字符串, HTTP 方法名必须是有效的符号. iib.parseheaders.ise.error=意外状态:已解析标头。 缓冲池不回收? iib.readtimeout=从套接字读取数据超时 diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index 044a6a19fc..d6bea23d29 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -186,6 +186,9 @@ <fix> Fix non-blocking reads of chunked request bodies. (markt) </fix> + <scode> + Refactor HTTP header parsing to use common parsing code. (markt) + </scode> </changelog> </subsection> <subsection name="Other"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org