This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit 3443bffb40c22b2c107e277575e02f9f4e688e3d Author: Mark Thomas <ma...@apache.org> AuthorDate: Fri Apr 26 15:47:23 2024 +0100 Add a common header parser for headers and trailers --- .../tomcat/util/http/parser/HttpHeaderParser.java | 409 +++++++++++++++++++++ .../util/http/parser/LocalStrings.properties | 3 + .../util/http/parser/LocalStrings_fr.properties | 2 + .../util/http/parser/LocalStrings_ja.properties | 2 + 4 files changed, 416 insertions(+) diff --git a/java/org/apache/tomcat/util/http/parser/HttpHeaderParser.java b/java/org/apache/tomcat/util/http/parser/HttpHeaderParser.java new file mode 100644 index 0000000000..7ef3b8b5ee --- /dev/null +++ b/java/org/apache/tomcat/util/http/parser/HttpHeaderParser.java @@ -0,0 +1,409 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.http.parser; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.tomcat.util.buf.MessageBytes; +import org.apache.tomcat.util.http.HeaderUtil; +import org.apache.tomcat.util.http.MimeHeaders; +import org.apache.tomcat.util.res.StringManager; + +public class HttpHeaderParser { + + private static final StringManager sm = StringManager.getManager(HttpHeaderParser.class); + + private static final byte CR = (byte) '\r'; + private static final byte LF = (byte) '\n'; + private static final byte SP = (byte) ' '; + private static final byte HT = (byte) '\t'; + private static final byte COLON = (byte) ':'; + private static final byte A = (byte) 'A'; + private static final byte a = (byte) 'a'; + private static final byte Z = (byte) 'Z'; + private static final byte LC_OFFSET = A - a; + + private final HeaderDataSource source; + private final MimeHeaders headers; + private final boolean tolerantEol; + private final HeaderParseData headerData = new HeaderParseData(); + + private HeaderParsePosition headerParsePos = HeaderParsePosition.HEADER_START; + private byte prevChr = 0; + private byte chr = 0; + + + public HttpHeaderParser(HeaderDataSource source, MimeHeaders headers, boolean tolerantEol) { + this.source = source; + this.headers = headers; + this.tolerantEol = tolerantEol; + } + + + public void recycle() { + chr = 0; + prevChr = 0; + headerParsePos = HeaderParsePosition.HEADER_START; + headerData.recycle(); + } + + + /** + * Parse an HTTP header. + * + * @return One of {@link HeaderParseStatus#NEED_MORE_DATA}, {@link HeaderParseStatus#HAVE_MORE_HEADERS} or + * {@link HeaderParseStatus#DONE}. + * + * @throws IOException If an error occurs during the parsing of the headers + */ + public HeaderParseStatus parseHeader() throws IOException { + + while (headerParsePos == HeaderParsePosition.HEADER_START) { + + // Read new bytes if needed + if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) { + if (!source.fillHeaderBuffer()) { + return HeaderParseStatus.NEED_MORE_DATA; + } + } + + prevChr = chr; + chr = source.getHeaderByteBuffer().get(); + + if (chr == CR && prevChr != CR) { + // Possible start of CRLF - process the next byte. + } else if (chr == LF) { + if (!tolerantEol && prevChr != CR) { + throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR")); + } + return HeaderParseStatus.DONE; + } else { + if (prevChr == CR) { + // Must have read two bytes (first was CR, second was not LF) + source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 2); + } else { + // Must have only read one byte + source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1); + } + break; + } + } + + if (headerParsePos == HeaderParsePosition.HEADER_START) { + // Mark the current buffer position + headerData.start = source.getHeaderByteBuffer().position(); + headerData.lineStart = headerData.start; + headerParsePos = HeaderParsePosition.HEADER_NAME; + } + + // + // Reading the header name + // Header name is always US-ASCII + // + + while (headerParsePos == HeaderParsePosition.HEADER_NAME) { + + // Read new bytes if needed + if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) { + if (!source.fillHeaderBuffer()) { + return HeaderParseStatus.NEED_MORE_DATA; + } + } + + int pos = source.getHeaderByteBuffer().position(); + chr = source.getHeaderByteBuffer().get(); + if (chr == COLON) { + if (headerData.start == pos) { + // Zero length header name - not valid. + // skipLine() will handle the error + return skipLine(); + } + headerParsePos = HeaderParsePosition.HEADER_VALUE_START; + headerData.headerValue = headers.addValue(source.getHeaderByteBuffer().array(), headerData.start, + pos - headerData.start); + pos = source.getHeaderByteBuffer().position(); + // Mark the current buffer position + headerData.start = pos; + headerData.realPos = pos; + headerData.lastSignificantChar = pos; + break; + } else if (!HttpParser.isToken(chr)) { + // Non-token characters are illegal in header names + // Parsing continues so the error can be reported in context + headerData.lastSignificantChar = pos; + source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1); + // skipLine() will handle the error + return skipLine(); + } + + // chr is next byte of header name. Convert to lowercase. + if (chr >= A && chr <= Z) { + source.getHeaderByteBuffer().put(pos, (byte) (chr - LC_OFFSET)); + } + } + + // Skip the line and ignore the header + if (headerParsePos == HeaderParsePosition.HEADER_SKIPLINE) { + return skipLine(); + } + + // + // Reading the header value (which can be spanned over multiple lines) + // + + while (headerParsePos == HeaderParsePosition.HEADER_VALUE_START || + headerParsePos == HeaderParsePosition.HEADER_VALUE || + headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) { + + if (headerParsePos == HeaderParsePosition.HEADER_VALUE_START) { + // Skipping spaces + while (true) { + // Read new bytes if needed + if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) { + if (!source.fillHeaderBuffer()) { + return HeaderParseStatus.NEED_MORE_DATA; + } + } + + chr = source.getHeaderByteBuffer().get(); + if (chr != SP && chr != HT) { + headerParsePos = HeaderParsePosition.HEADER_VALUE; + source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1); + // Avoids prevChr = chr at start of header value + // parsing which causes problems when chr is CR + // (in the case of an empty header value) + chr = 0; + break; + } + } + } + if (headerParsePos == HeaderParsePosition.HEADER_VALUE) { + + // Reading bytes until the end of the line + boolean eol = false; + while (!eol) { + + // Read new bytes if needed + if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) { + if (!source.fillHeaderBuffer()) { + return HeaderParseStatus.NEED_MORE_DATA; + } + } + + prevChr = chr; + chr = source.getHeaderByteBuffer().get(); + if (chr == CR && prevChr != CR) { + // CR is only permitted at the start of a CRLF sequence. + // Possible start of CRLF - process the next byte. + } else if (chr == LF) { + if (!tolerantEol && prevChr != CR) { + throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR")); + } + eol = true; + } else if (prevChr == CR) { + // Invalid value - also need to delete header + return skipLine(); + } else if (HttpParser.isControl(chr) && chr != HT) { + // Invalid value - also need to delete header + return skipLine(); + } else if (chr == SP || chr == HT) { + source.getHeaderByteBuffer().put(headerData.realPos, chr); + headerData.realPos++; + } else { + source.getHeaderByteBuffer().put(headerData.realPos, chr); + headerData.realPos++; + headerData.lastSignificantChar = headerData.realPos; + } + } + + // Ignore whitespaces at the end of the line + headerData.realPos = headerData.lastSignificantChar; + + // Checking the first character of the new line. If the character + // is a LWS, then it's a multiline header + headerParsePos = HeaderParsePosition.HEADER_MULTI_LINE; + } + // Read new bytes if needed + if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) { + if (!source.fillHeaderBuffer()) { + return HeaderParseStatus.NEED_MORE_DATA; + } + } + + byte peek = source.getHeaderByteBuffer().get(source.getHeaderByteBuffer().position()); + if (headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) { + if (peek != SP && peek != HT) { + headerParsePos = HeaderParsePosition.HEADER_START; + break; + } else { + // Copying one extra space in the buffer (since there must + // be at least one space inserted between the lines) + source.getHeaderByteBuffer().put(headerData.realPos, peek); + headerData.realPos++; + headerParsePos = HeaderParsePosition.HEADER_VALUE_START; + } + } + } + // Set the header value + headerData.headerValue.setBytes(source.getHeaderByteBuffer().array(), headerData.start, + headerData.lastSignificantChar - headerData.start); + headerData.recycle(); + return HeaderParseStatus.HAVE_MORE_HEADERS; + } + + + private HeaderParseStatus skipLine() throws IOException { + // Parse the rest of the invalid header so we can construct a useful + // exception and/or debug message. + headerParsePos = HeaderParsePosition.HEADER_SKIPLINE; + boolean eol = false; + + // Reading bytes until the end of the line + while (!eol) { + + // Read new bytes if needed + if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) { + if (!source.fillHeaderBuffer()) { + return HeaderParseStatus.NEED_MORE_DATA; + } + } + + int pos = source.getHeaderByteBuffer().position(); + prevChr = chr; + chr = source.getHeaderByteBuffer().get(); + if (chr == CR) { + // Skip + } else if (chr == LF) { + if (!tolerantEol && prevChr != CR) { + throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR")); + } + eol = true; + } else { + headerData.lastSignificantChar = pos; + } + } + + throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidHeader", + HeaderUtil.toPrintableString(source.getHeaderByteBuffer().array(), headerData.lineStart, + headerData.lastSignificantChar - headerData.lineStart + 1))); + } + + + public enum HeaderParseStatus { + DONE, + HAVE_MORE_HEADERS, + NEED_MORE_DATA + } + + + public enum HeaderParsePosition { + /** + * Start of a new header. A CRLF here means that there are no more headers. Any other character starts a header + * name. + */ + HEADER_START, + /** + * Reading a header name. All characters of header are HTTP_TOKEN_CHAR. Header name is followed by ':'. No + * whitespace is allowed.<br> + * Any non-HTTP_TOKEN_CHAR (this includes any whitespace) encountered before ':' will result in the whole line + * being ignored. + */ + HEADER_NAME, + /** + * Skipping whitespace before text of header value starts, either on the first line of header value (just after + * ':') or on subsequent lines when it is known that subsequent line starts with SP or HT. + */ + HEADER_VALUE_START, + /** + * Reading the header value. We are inside the value. Either on the first line or on any subsequent line. We + * come into this state from HEADER_VALUE_START after the first non-SP/non-HT byte is encountered on the line. + */ + HEADER_VALUE, + /** + * Before reading a new line of a header. Once the next byte is peeked, the state changes without advancing our + * position. The state becomes either HEADER_VALUE_START (if that first byte is SP or HT), or HEADER_START + * (otherwise). + */ + HEADER_MULTI_LINE, + /** + * Reading all bytes until the next CRLF. The line is being ignored. + */ + HEADER_SKIPLINE + } + + + private static class HeaderParseData { + /** + * The first character of the header line. + */ + int lineStart = 0; + /** + * When parsing header name: first character of the header.<br> + * When skipping broken header line: first character of the header.<br> + * When parsing header value: first character after ':'. + */ + int start = 0; + /** + * When parsing header name: not used (stays as 0).<br> + * When skipping broken header line: not used (stays as 0).<br> + * When parsing header value: starts as the first character after ':'. Then is increased as far as more bytes of + * the header are harvested. Bytes from buf[pos] are copied to buf[realPos]. Thus the string from [start] to + * [realPos-1] is the prepared value of the header, with whitespaces removed as needed.<br> + */ + int realPos = 0; + /** + * When parsing header name: not used (stays as 0).<br> + * When skipping broken header line: last non-CR/non-LF character.<br> + * When parsing header value: position after the last not-LWS character.<br> + */ + int lastSignificantChar = 0; + /** + * MB that will store the value of the header. It is null while parsing header name and is created after the + * name has been parsed. + */ + MessageBytes headerValue = null; + + public void recycle() { + lineStart = 0; + start = 0; + realPos = 0; + lastSignificantChar = 0; + headerValue = null; + } + } + + + public interface HeaderDataSource { + /** + * Read more data into the header buffer. The implementation is expected to determine if blocking or not + * blocking IO should be used. + * + * @return {@code true} if more data was added to the buffer, otherwise {@code false} + * + * @throws IOException If an I/O error occurred while obtaining more header data + */ + boolean fillHeaderBuffer() throws IOException; + + /** + * Obtain a reference to the buffer containing the header data. + * + * @return The buffer containing the header data + */ + ByteBuffer getHeaderByteBuffer(); + } +} diff --git a/java/org/apache/tomcat/util/http/parser/LocalStrings.properties b/java/org/apache/tomcat/util/http/parser/LocalStrings.properties index 325e7da57f..07238a9424 100644 --- a/java/org/apache/tomcat/util/http/parser/LocalStrings.properties +++ b/java/org/apache/tomcat/util/http/parser/LocalStrings.properties @@ -45,6 +45,9 @@ http.tooManyColons=An IPv6 address may not contain more than 2 sequential colon http.tooManyDoubleColons=An IPv6 address may only contain a single '::' sequence. http.tooManyHextets=The IPv6 address contains [{0}] hextets but a valid IPv6 address may not have more than 8. +httpHeaderParser.invalidCrlfNoCR=Invalid end of line sequence (No CR before LF) +httpHeaderParser.invalidHeader=The HTTP header line [{0}] does not conform to RFC 9112. The request has been rejected. + sf.bareitem.invalidCharacter=The invalid character [{0}] was found parsing when start of a bare item sf.base64.invalidCharacter=The [{0}] character is not valid inside a base64 sequence sf.boolean.invalidCharacter=The [{0}] character is not a valid boolean value diff --git a/java/org/apache/tomcat/util/http/parser/LocalStrings_fr.properties b/java/org/apache/tomcat/util/http/parser/LocalStrings_fr.properties index a954343891..a1be4a83bd 100644 --- a/java/org/apache/tomcat/util/http/parser/LocalStrings_fr.properties +++ b/java/org/apache/tomcat/util/http/parser/LocalStrings_fr.properties @@ -45,6 +45,8 @@ http.tooManyColons=Une adresse IPv6 ne peut pas contenir plus de deux caractère http.tooManyDoubleColons=Une adresse IPv6 ne peut contenir qu'une seule séquence "::" http.tooManyHextets=L''adresse IPv6 contient [{0}] groupes de 4 octets mais une adresse IPv6 valide ne doit pas en avoir plus de 8 +httpHeaderParser.invalidHeader=La ligne d''en-t�te HTTP [{0}] ne respecte pas la RFC 7230. La requ�te a �t� rejet�e. + sf.bareitem.invalidCharacter=Le caractère [{0}] invalide a été rencontré en début d''un objet sf.base64.invalidCharacter=Le caractère [{0}] est invalide dans une séquence base64 sf.boolean.invalidCharacter=Le caractère [{0}] n''est pas une valeur booléene valide diff --git a/java/org/apache/tomcat/util/http/parser/LocalStrings_ja.properties b/java/org/apache/tomcat/util/http/parser/LocalStrings_ja.properties index b9bf412a57..abde689744 100644 --- a/java/org/apache/tomcat/util/http/parser/LocalStrings_ja.properties +++ b/java/org/apache/tomcat/util/http/parser/LocalStrings_ja.properties @@ -45,6 +45,8 @@ http.tooManyColons=IPv6 アドレスでは文字 : を 2 つ以上連続する http.tooManyDoubleColons=IPv6アドレスは単一の '::'シーケンスのみを含むことができます。 http.tooManyHextets=IPv6 アドレスは [{0}] ヘクステットで構成されていますが、正常な IPv6 アドレスなら 8 ヘクステット以上になりません。 +httpHeaderParser.invalidHeader=HTTP ヘッダーの [{0}] 行目は RFC 7230 に準拠していません。リクエストは拒否されました。 + sf.bareitem.invalidCharacter=ベアアイテムの開始を解析中に無効な文字 [{0}] が見つかりました sf.base64.invalidCharacter=文字 [{0}] は base64 シーケンス内では無効です sf.boolean.invalidCharacter=文字 [{0}] は有効なブール値ではありません --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org