On Fri, Jul 29, 2016 at 12:37 PM, <wr...@apache.org> wrote:

> Author: wrowe
> Date: Fri Jul 29 17:37:41 2016
> New Revision: 1754556
>
> URL: http://svn.apache.org/viewvc?rev=1754556&view=rev
> Log:
> Introduce ap_scan_http_token / ap_scan_http_field_content for a much
> more efficient pass through the header text; rather than reparsing
> the strings over and over under the HTTP_CONFORMANCE_STRICT fules.
>
> Improve logic and legibility by eliminating multiple repetitive tests
> of the STRICT flag, and simply reorder 'classic' behavior first and
> this new parser second to simplify the diff. Because of the whitespace
> change (which I had wished to dodge), reading this --ignore-all-space
> is a whole lot easier. Particularly against 2.4.x branch, which is now
> identical in the 'classic' logic flow. Both of which I'll share with dev@
>

That diff to 2.4.x reads as;

--- ../../httpd-2.4/server/protocol.c 2016-07-29 12:05:52.560891394 -0500
+++ protocol.c 2016-07-29 12:17:29.333717519 -0500
@@ -825,6 +900,10 @@
                     return;
                 }

+                if (!(conf->http_conformance &
AP_HTTP_CONFORMANCE_STRICT)) {
+                {
+                    /* Not Strict, using the legacy parser */
+
                 if (!(value = strchr(last_field, ':'))) { /* Find ':' or
 */
                     r->status = HTTP_BAD_REQUEST;      /* abort bad
request */
                     apr_table_setn(r->notes, "error-notes",
@@ -862,6 +941,64 @@
                        && (*tmp_field == ' ' || *tmp_field == '\t')) {
                     *tmp_field-- = '\0';
                 }
+                }
+                else /* Using strict RFC7230 parsing */
+                {
+                    /* Ensure valid token chars before ':' per RFC 7230
3.2.4 */
+                    if (!(value = (char *)ap_scan_http_token(last_field))
+                            || *value != ':') {
+                        r->status = HTTP_BAD_REQUEST;
+                        apr_table_setn(r->notes, "error-notes",
+                            apr_psprintf(r->pool,
+                                         "Request header field name "
+                                         "is malformed.<br />\n"
+                                         "<pre>\n%.*s</pre>\n",
+                                         (int)LOG_NAME_MAX_LEN,
+                                         ap_escape_html(r->pool,
last_field)));
+                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
APLOGNO(02426)
+                                      "Request header field name is
malformed: "
+                                      "%.*s", (int)LOG_NAME_MAX_LEN,
last_field);
+                        return;
+                    }
+
+                    *value++ = '\0'; /* NUL-terminate last_field name at
':' */
+
+                    while (*value == ' ' || *value == '\t') {
+                        ++value;     /* Skip LWS of value */
+                    }
+
+                    /* Find invalid, non-HT ctrl char, or the trailing
NULL */
+                    tmp_field = (char *)ap_scan_http_field_content(value);
+
+                    /* Strip LWS after field-value, if string not empty */
+                    if (*value && (*tmp_field == '\0')) {
+                        tmp_field--;
+                        while (*tmp_field == ' ' || *tmp_field == '\t') {
+                            *tmp_field-- = '\0';
+                        }
+                        ++tmp_field;
+                    }
+
+                    /* Reject value for all garbage input (CTRLs excluding
HT)
+                     * e.g. only VCHAR / SP / HT / obs-text are allowed per
+                     * RFC7230 3.2.6 - leave all more explicit rule
enforcement
+                     * for specific header handler logic later in the cycle
+                     */
+                    if (*tmp_field != '\0') {
+                        r->status = HTTP_BAD_REQUEST;
+                        apr_table_setn(r->notes, "error-notes",
+                            apr_psprintf(r->pool,
+                                         "Request header value "
+                                         "is malformed.<br />\n"
+                                         "<pre>\n%.*s</pre>\n",
+                                         (int)LOG_NAME_MAX_LEN,
+                                         ap_escape_html(r->pool, value)));
+                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
APLOGNO(02427)
+                                      "Request header value is malformed: "
+                                      "%.*s", (int)LOG_NAME_MAX_LEN,
value);
+                        return;
+                    }
+                }

                 apr_table_addn(r->headers_in, last_field, value);


while the diff to -rPREV on trunk as --ignore-all-space reads as;

--- protocol.c.orig 2016-07-29 12:16:59.453082289 -0500
+++ protocol.c 2016-07-29 12:17:29.333717519 -0500
@@ -900,6 +900,10 @@
                     return;
                 }

+                if (!(conf->http_conformance &
AP_HTTP_CONFORMANCE_STRICT)) {
+                {
+                    /* Not Strict, using the legacy parser */
+
                 if (!(value = strchr(last_field, ':'))) { /* Find ':' or
 */
                     r->status = HTTP_BAD_REQUEST;      /* abort bad
request */
                     apr_table_setn(r->notes, "error-notes",
@@ -937,34 +941,65 @@
                        && (*tmp_field == ' ' || *tmp_field == '\t')) {
                     *tmp_field-- = '\0';
                 }
+                }
+                else /* Using strict RFC7230 parsing */
+                {
+                    /* Ensure valid token chars before ':' per RFC 7230
3.2.4 */
+                    if (!(value = (char *)ap_scan_http_token(last_field))
+                            || *value != ':') {
+                        r->status = HTTP_BAD_REQUEST;
+                        apr_table_setn(r->notes, "error-notes",
+                            apr_psprintf(r->pool,
+                                         "Request header field name "
+                                         "is malformed.<br />\n"
+                                         "<pre>\n%.*s</pre>\n",
+                                         (int)LOG_NAME_MAX_LEN,
+                                         ap_escape_html(r->pool,
last_field)));
+                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
APLOGNO(02426)
+                                      "Request header field name is
malformed: "
+                                      "%.*s", (int)LOG_NAME_MAX_LEN,
last_field);
+                        return;
+                    }

-                if (conf->http_conformance & AP_HTTP_CONFORMANCE_STRICT) {
-                    int err = 0;
+                    *value++ = '\0'; /* NUL-terminate last_field name at
':' */

-                    if (*last_field == '\0') {
-                        err = HTTP_BAD_REQUEST;
-                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
APLOGNO(02425)
-                                      "Empty request header field name not
allowed");
+                    while (*value == ' ' || *value == '\t') {
+                        ++value;     /* Skip LWS of value */
                     }
-                    else if (ap_has_cntrl(last_field)) {
-                        err = HTTP_BAD_REQUEST;
-                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
APLOGNO(02426)
-                                      "[HTTP strict] Request header field
name contains "
-                                      "control character: %.*s",
-                                      (int)LOG_NAME_MAX_LEN, last_field);
+
+                    /* Find invalid, non-HT ctrl char, or the trailing
NULL */
+                    tmp_field = (char *)ap_scan_http_field_content(value);
+
+                    /* Strip LWS after field-value, if string not empty */
+                    if (*value && (*tmp_field == '\0')) {
+                        tmp_field--;
+                        while (*tmp_field == ' ' || *tmp_field == '\t') {
+                            *tmp_field-- = '\0';
                     }
-                    else if (ap_has_cntrl(value)) {
-                        err = HTTP_BAD_REQUEST;
-                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
APLOGNO(02427)
-                                      "Request header field '%.*s'
contains "
-                                      "control character",
(int)LOG_NAME_MAX_LEN,
-                                      last_field);
+                        ++tmp_field;
                     }
-                    if (err && !(conf->http_conformance &
AP_HTTP_CONFORMANCE_LOGONLY)) {
-                        r->status = err;
+
+                    /* Reject value for all garbage input (CTRLs excluding
HT)
+                     * e.g. only VCHAR / SP / HT / obs-text are allowed per
+                     * RFC7230 3.2.6 - leave all more explicit rule
enforcement
+                     * for specific header handler logic later in the cycle
+                     */
+                    if (*tmp_field != '\0') {
+                        r->status = HTTP_BAD_REQUEST;
+                        apr_table_setn(r->notes, "error-notes",
+                            apr_psprintf(r->pool,
+                                         "Request header value "
+                                         "is malformed.<br />\n"
+                                         "<pre>\n%.*s</pre>\n",
+                                         (int)LOG_NAME_MAX_LEN,
+                                         ap_escape_html(r->pool, value)));
+                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
APLOGNO(02427)
+                                      "Request header value is malformed: "
+                                      "%.*s", (int)LOG_NAME_MAX_LEN,
value);
                         return;
                     }
                 }
+
                 apr_table_addn(r->headers_in, last_field, value);

                 /* reset the alloc_len so that we'll allocate a new

Reply via email to