This is an automated email from the ASF dual-hosted git repository. amc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push: new 0be4e73 MIMEScanner: Make MIMEScanner a class, not a POD with free functions. 0be4e73 is described below commit 0be4e7326b89670dea1ffe4c7f21c36bfd2bdf59 Author: Alan M. Carroll <a...@apache.org> AuthorDate: Wed Feb 6 11:10:22 2019 -0600 MIMEScanner: Make MIMEScanner a class, not a POD with free functions. --- proxy/hdrs/HTTP.cc | 27 +-- proxy/hdrs/HdrTest.cc | 79 --------- proxy/hdrs/HdrTest.h | 1 - proxy/hdrs/MIME.cc | 303 ++++++++++++-------------------- proxy/hdrs/MIME.h | 82 +++++++-- proxy/hdrs/Makefile.am | 1 + proxy/hdrs/unit_tests/test_Hdrs.cc | 86 +++++++++ proxy/hdrs/unit_tests/unit_test_main.cc | 21 ++- 8 files changed, 305 insertions(+), 295 deletions(-) diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc index 41bf41b..eebf4ad 100644 --- a/proxy/hdrs/HTTP.cc +++ b/proxy/hdrs/HTTP.cc @@ -894,17 +894,21 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const const char *version_start; const char *version_end; + ts::TextView text, parsed; + real_end = end; start: hh->m_polarity = HTTP_TYPE_REQUEST; // Make sure the line is not longer than 64K - if (scanner->m_line_length >= UINT16_MAX) { + if (scanner->get_buffered_line_size() >= UINT16_MAX) { return PARSE_RESULT_ERROR; } - err = mime_scanner_get(scanner, start, real_end, &line_start, &end, &line_is_real, eof, MIME_SCANNER_TYPE_LINE); + text.assign(*start, real_end); + err = scanner->get(text, parsed, line_is_real, eof, MIMEScanner::LINE); + *start = text.data(); if (err < 0) { return err; } @@ -917,9 +921,9 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const return err; } - cur = line_start; - ink_assert((end - cur) >= 0); - ink_assert((end - cur) < UINT16_MAX); + ink_assert(parsed.size() < UINT16_MAX); + line_start = cur = parsed.data(); + end = parsed.data_end(); must_copy_strings = (must_copy_strings || (!line_is_real)); @@ -1244,11 +1248,14 @@ http_parser_parse_resp(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const hh->m_polarity = HTTP_TYPE_RESPONSE; // Make sure the line is not longer than 64K - if (scanner->m_line_length >= UINT16_MAX) { + if (scanner->get_buffered_line_size() >= UINT16_MAX) { return PARSE_RESULT_ERROR; } - err = mime_scanner_get(scanner, start, real_end, &line_start, &end, &line_is_real, eof, MIME_SCANNER_TYPE_LINE); + ts::TextView text{*start, real_end}; + ts::TextView parsed; + err = scanner->get(text, parsed, line_is_real, eof, MIMEScanner::LINE); + *start = text.data(); if (err < 0) { return err; } @@ -1256,9 +1263,9 @@ http_parser_parse_resp(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const return err; } - cur = line_start; - ink_assert((end - cur) >= 0); - ink_assert((end - cur) < UINT16_MAX); + ink_assert(parsed.size() < UINT16_MAX); + line_start = cur = parsed.data(); + end = parsed.data_end(); must_copy_strings = (must_copy_strings || (!line_is_real)); diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc index ee3f93d..875e4e1 100644 --- a/proxy/hdrs/HdrTest.cc +++ b/proxy/hdrs/HdrTest.cc @@ -74,7 +74,6 @@ HdrTest::go(RegressionTest *t, int /* atype ATS_UNUSED */) status = status & test_url(); status = status & test_arena(); status = status & test_regex(); - status = status & test_http_parser_eos_boundary_cases(); status = status & test_http_mutation(); status = status & test_mime(); status = status & test_http(); @@ -521,84 +520,6 @@ HdrTest::test_mime() -------------------------------------------------------------------------*/ int -HdrTest::test_http_parser_eos_boundary_cases() -{ - struct { - const char *msg; - int expected_result; - int expected_bytes_consumed; - } tests[] = { - {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26}, - {"GET /index.html HTTP/1.0\r\n\r\n***BODY****", PARSE_RESULT_DONE, 28}, - {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n***BODY****", PARSE_RESULT_DONE, 48}, - {"GET", PARSE_RESULT_ERROR, 3}, - {"GET /index.html", PARSE_RESULT_ERROR, 15}, - {"GET /index.html\r\n", PARSE_RESULT_ERROR, 17}, - {"GET /index.html HTTP/1.0", PARSE_RESULT_ERROR, 24}, - {"GET /index.html HTTP/1.0\r", PARSE_RESULT_ERROR, 25}, - {"GET /index.html HTTP/1.0\n", PARSE_RESULT_DONE, 25}, - {"GET /index.html HTTP/1.0\n\n", PARSE_RESULT_DONE, 26}, - {"GET /index.html HTTP/1.0\r\n\r\n", PARSE_RESULT_DONE, 28}, - {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar", PARSE_RESULT_ERROR, 44}, - {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 45}, - {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 46}, - {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n", PARSE_RESULT_DONE, 48}, - {"GET /index.html HTTP/1.0\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 44}, - {"GET /index.html HTTP/1.0\nUser-Agent: foobar\nBoo: foo\n", PARSE_RESULT_DONE, 53}, - {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 46}, - {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26}, - {"", PARSE_RESULT_ERROR, 0}, - {nullptr, 0, 0}, - }; - - int i, ret, bytes_consumed; - const char *orig_start; - const char *start; - const char *end; - HTTPParser parser; - - int failures = 0; - - bri_box("test_http_parser_eos_boundary_cases"); - - http_parser_init(&parser); - - for (i = 0; tests[i].msg != nullptr; i++) { - HTTPHdr req_hdr; - - start = tests[i].msg; - end = start + strlen(start); // 1 character past end of string - - req_hdr.create(HTTP_TYPE_REQUEST); - - http_parser_clear(&parser); - - orig_start = start; - ret = req_hdr.parse_req(&parser, &start, end, true); - bytes_consumed = (int)(start - orig_start); - - printf("======== test %d (length=%d, consumed=%d)\n", i, (int)strlen(tests[i].msg), bytes_consumed); - printf("[%s]\n", tests[i].msg); - printf("\n["); - req_hdr.print(nullptr, 0, nullptr, nullptr); - printf("]\n\n"); - - if ((ret != tests[i].expected_result) || (bytes_consumed != tests[i].expected_bytes_consumed)) { - ++failures; - printf("FAILED: test %d: retval <expected %d, got %d>, eaten <expected %d, got %d>\n\n", i, tests[i].expected_result, ret, - tests[i].expected_bytes_consumed, bytes_consumed); - } - - req_hdr.destroy(); - } - - return (failures_to_status("test_http_parser_eos_boundary_cases", failures)); -} - -/*------------------------------------------------------------------------- - -------------------------------------------------------------------------*/ - -int HdrTest::test_http_aux(const char *request, const char *response) { int err; diff --git a/proxy/hdrs/HdrTest.h b/proxy/hdrs/HdrTest.h index 91a979c..7f8dc83 100644 --- a/proxy/hdrs/HdrTest.h +++ b/proxy/hdrs/HdrTest.h @@ -51,7 +51,6 @@ private: int test_parse_date(); int test_format_date(); int test_url(); - int test_http_parser_eos_boundary_cases(); int test_arena(); int test_regex(); int test_accept_language_match(); diff --git a/proxy/hdrs/MIME.cc b/proxy/hdrs/MIME.cc index 81c4622..d130b20 100644 --- a/proxy/hdrs/MIME.cc +++ b/proxy/hdrs/MIME.cc @@ -34,6 +34,8 @@ #include "HdrUtils.h" #include "HttpCompat.h" +using ts::TextView; + /*********************************************************************** * * * C O M P I L E O P T I O N S * @@ -2302,215 +2304,161 @@ MIMEHdr::get_host_port_values(const char **host_ptr, ///< Pointer to host. * P A R S E R * * * ***********************************************************************/ -void -_mime_scanner_init(MIMEScanner *scanner) -{ - scanner->m_line = nullptr; - scanner->m_line_size = 0; - scanner->m_line_length = 0; - scanner->m_state = MIME_PARSE_BEFORE; -} -////////////////////////////////////////////////////// -// init first time structure setup // -// clear resets an already-initialized structure // -////////////////////////////////////////////////////// void -mime_scanner_init(MIMEScanner *scanner) +MIMEScanner::init() { - _mime_scanner_init(scanner); + m_state = INITIAL_PARSE_STATE; + // Ugly, but required because of how proxy allocation works - that leaves the instance in a + // random state, so even assigning to it can crash. Because this method substitutes for a real + // constructor in the proxy allocation system, call the CTOR here. Any memory that gets allocated + // is supposed to be cleaned up by calling @c clear on this object. + new (&m_line) std::string; } -// clear is to reset an already initialized structure -void -mime_scanner_clear(MIMEScanner *scanner) +MIMEScanner & +MIMEScanner::append(TextView text) { - ats_free(scanner->m_line); - _mime_scanner_init(scanner); -} - -void -mime_scanner_append(MIMEScanner *scanner, const char *data, int data_size) -{ - int free_size = scanner->m_line_size - scanner->m_line_length; - - ////////////////////////////////////////////////////// - // if not enough space, allocate or grow the buffer // - ////////////////////////////////////////////////////// - if (data_size > free_size) { // need to allocate/grow the buffer - if (scanner->m_line_size == 0) { // buffer should be at least 128 bytes - scanner->m_line_size = 128; - } - - while (free_size < data_size) { // grow buffer by powers of 2 - scanner->m_line_size *= 2; - free_size = scanner->m_line_size - scanner->m_line_length; - } - - if (scanner->m_line == nullptr) { // if no buffer yet, allocate one - scanner->m_line = (char *)ats_malloc(scanner->m_line_size); - } else { - scanner->m_line = (char *)ats_realloc(scanner->m_line, scanner->m_line_size); - } - } - //////////////////////////////////////////////// - // append new data onto the end of the buffer // - //////////////////////////////////////////////// - - memcpy(&(scanner->m_line[scanner->m_line_length]), data, data_size); - scanner->m_line_length += data_size; + m_line += text; + return *this; } ParseResult -mime_scanner_get(MIMEScanner *S, const char **raw_input_s, const char *raw_input_e, const char **output_s, const char **output_e, - bool *output_shares_raw_input, - bool raw_input_eof, ///< All data has been received for this header. - int raw_input_scan_type) +MIMEScanner::get(TextView &input, TextView &output, bool &output_shares_input, bool eof_p, ScanType scan_type) { - const char *raw_input_c, *lf_ptr; ParseResult zret = PARSE_RESULT_CONT; // Need this for handling dangling CR. - static const char RAW_CR = ParseRules::CHAR_CR; - - ink_assert((raw_input_s != nullptr) && (*raw_input_s != nullptr)); - ink_assert(raw_input_e != nullptr); + static const char RAW_CR{ParseRules::CHAR_CR}; - raw_input_c = *raw_input_s; - - while (PARSE_RESULT_CONT == zret && raw_input_c < raw_input_e) { - ptrdiff_t runway = raw_input_e - raw_input_c; // remaining input. - switch (S->m_state) { + auto text = input; + while (PARSE_RESULT_CONT == zret && !text.empty()) { + switch (m_state) { case MIME_PARSE_BEFORE: // waiting to find a field. - if (ParseRules::is_cr(*raw_input_c)) { - ++raw_input_c; - if (runway >= 2 && ParseRules::is_lf(*raw_input_c)) { + if (ParseRules::is_cr(*text)) { + ++text; + if (!text.empty() && ParseRules::is_lf(*text)) { // optimize a bit - this happens >99% of the time after a CR. - ++raw_input_c; + ++text; zret = PARSE_RESULT_DONE; } else { - S->m_state = MIME_PARSE_FOUND_CR; + m_state = MIME_PARSE_FOUND_CR; } - } else if (ParseRules::is_lf(*raw_input_c)) { - ++raw_input_c; + } else if (ParseRules::is_lf(*text)) { + ++text; zret = PARSE_RESULT_DONE; // Required by regression test. } else { // consume this character in the next state. - S->m_state = MIME_PARSE_INSIDE; + m_state = MIME_PARSE_INSIDE; } break; case MIME_PARSE_FOUND_CR: - // Looking for a field and found a CR, which should mean terminating - // the header. Note that we've left the CR in the input so we have - // to skip over it. - if (ParseRules::is_lf(*raw_input_c)) { - // Header terminated. - ++raw_input_c; + // Looking for a field and found a CR, which should mean terminating the header. + if (ParseRules::is_lf(*text)) { + ++text; zret = PARSE_RESULT_DONE; } else { - // This really should be an error (spec doesn't permit lone CR) - // but the regression tests require it. - mime_scanner_append(S, &RAW_CR, 1); - S->m_state = MIME_PARSE_INSIDE; + // This really should be an error (spec doesn't permit lone CR) but the regression tests + // require it. + this->append({&RAW_CR, 1}); + m_state = MIME_PARSE_INSIDE; } break; - case MIME_PARSE_INSIDE: - lf_ptr = static_cast<const char *>(memchr(raw_input_c, ParseRules::CHAR_LF, runway)); - if (lf_ptr) { - raw_input_c = lf_ptr + 1; - if (MIME_SCANNER_TYPE_LINE == raw_input_scan_type) { - zret = PARSE_RESULT_OK; - S->m_state = MIME_PARSE_BEFORE; + case MIME_PARSE_INSIDE: { + auto lf_off = text.find(ParseRules::CHAR_LF); + if (lf_off != TextView::npos) { + text.remove_prefix(lf_off + 1); // drop up to and including LF + if (LINE == scan_type) { + zret = PARSE_RESULT_OK; + m_state = MIME_PARSE_BEFORE; } else { - S->m_state = MIME_PARSE_AFTER; + m_state = MIME_PARSE_AFTER; // looking for line folding. } - } else { - raw_input_c = raw_input_e; // grab all that's available. + } else { // no EOL, consume all text without changing state. + text.remove_prefix(text.size()); } - break; + } break; case MIME_PARSE_AFTER: - // After a LF. Might be the end or a continuation. - if (ParseRules::is_ws(*raw_input_c)) { - char *unfold = const_cast<char *>(raw_input_c - 1); - - *unfold-- = ' '; + // After a LF, the next line might be a continuation / folded line. That's indicated by a + // starting whitespace. If that's the case, back up over the preceding CR/LF with space and + // pretend it's the same line. + if (ParseRules::is_ws(*text)) { // folded line. + char *unfold = const_cast<char *>(text.data() - 1); + *unfold-- = ' '; if (ParseRules::is_cr(*unfold)) { *unfold = ' '; } - S->m_state = MIME_PARSE_INSIDE; // back inside the field. + m_state = MIME_PARSE_INSIDE; // back inside the field. } else { - S->m_state = MIME_PARSE_BEFORE; // field terminated. - zret = PARSE_RESULT_OK; + m_state = MIME_PARSE_BEFORE; // field terminated. + zret = PARSE_RESULT_OK; } break; } } - ptrdiff_t data_size = raw_input_c - *raw_input_s; + TextView parsed_text{input.data(), text.data()}; + bool save_parsed_text_p = !parsed_text.empty(); if (PARSE_RESULT_CONT == zret) { - // data ran out before we got a clear final result. - // There a number of things we need to check and possibly adjust - // that result. It's less complex to do this cleanup than handle - // in the parser state machine. - if (raw_input_eof) { + // data ran out before we got a clear final result. There a number of things we need to check + // and possibly adjust that result. It's less complex to do this cleanup than handle all of + // these checks in the parser state machine. + if (eof_p) { // Should never return PARSE_CONT if we've hit EOF. - if (0 == data_size) { + if (parsed_text.empty()) { // all input previously consumed. If we're between fields, that's cool. - if (MIME_PARSE_INSIDE != S->m_state) { - S->m_state = MIME_PARSE_BEFORE; // probably not needed... - zret = PARSE_RESULT_DONE; + if (MIME_PARSE_INSIDE != m_state) { + m_state = MIME_PARSE_BEFORE; // probably not needed... + zret = PARSE_RESULT_DONE; } else { zret = PARSE_RESULT_ERROR; // unterminated field. } - } else if (MIME_PARSE_AFTER == S->m_state) { - // Special case it seems - need to accept the final field - // even if there's no header terminating CR LF. We check for - // absolute end of input because otherwise this might be - // a multiline field where we haven't seen the next leading space. - S->m_state = MIME_PARSE_BEFORE; - zret = PARSE_RESULT_OK; + } else if (MIME_PARSE_AFTER == m_state) { + // Special case it seems - need to accept the final field even if there's no header + // terminating CR LF. This is only reasonable after absolute end of input (EOF) because + // otherwise this might be a multiline field where we haven't seen the next leading space. + m_state = MIME_PARSE_BEFORE; + zret = PARSE_RESULT_OK; } else { // Partial input, no field / line CR LF zret = PARSE_RESULT_ERROR; // Unterminated field. } - } else if (data_size) { - if (MIME_PARSE_INSIDE == S->m_state) { + } else if (!parsed_text.empty()) { + if (MIME_PARSE_INSIDE == m_state) { // Inside a field but more data is expected. Save what we've got. - mime_scanner_append(S, *raw_input_s, data_size); - data_size = 0; // Don't append again. - } else if (MIME_PARSE_AFTER == S->m_state) { + this->append(parsed_text); // Do this here to force appending. + save_parsed_text_p = false; // don't double append. + } else if (MIME_PARSE_AFTER == m_state) { // After a field but we still have data. Need to parse it too. - S->m_state = MIME_PARSE_BEFORE; - zret = PARSE_RESULT_OK; + m_state = MIME_PARSE_BEFORE; + zret = PARSE_RESULT_OK; } } } - if (data_size && S->m_line_length) { + if (save_parsed_text_p && !m_line.empty()) { // If we're already accumulating, continue to do so if we have data. - mime_scanner_append(S, *raw_input_s, data_size); + this->append(parsed_text); } - // No sharing if we've accumulated data (really, force this to make compiler shut up). - *output_shares_raw_input = 0 == S->m_line_length; // adjust out arguments. if (PARSE_RESULT_CONT != zret) { - if (0 != S->m_line_length) { - *output_s = S->m_line; - *output_e = *output_s + S->m_line_length; - S->m_line_length = 0; + if (!m_line.empty()) { + output = m_line; + m_line.resize(0); // depending resize(0) not deallocating internal string memory. + output_shares_input = false; } else { - *output_s = *raw_input_s; - *output_e = raw_input_c; + output = parsed_text; + output_shares_input = true; } } - // Make sure there are no '\0' in the input scanned so far - if (zret != PARSE_RESULT_ERROR && memchr(*raw_input_s, '\0', raw_input_c - *raw_input_s) != nullptr) { + // Make sure there are no null characters in the input scanned so far + if (zret != PARSE_RESULT_ERROR && TextView::npos != parsed_text.find('\0')) { zret = PARSE_RESULT_ERROR; } - *raw_input_s = raw_input_c; // mark input consumed. + input.remove_prefix(parsed_text.size()); return zret; } @@ -2528,14 +2476,14 @@ _mime_parser_init(MIMEParser *parser) void mime_parser_init(MIMEParser *parser) { - mime_scanner_init(&parser->m_scanner); + parser->m_scanner.init(); _mime_parser_init(parser); } void mime_parser_clear(MIMEParser *parser) { - mime_scanner_clear(&parser->m_scanner); + parser->m_scanner.clear(); _mime_parser_init(parser); } @@ -2545,17 +2493,6 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl *mh, const char { ParseResult err; bool line_is_real; - const char *colon; - const char *line_c; - const char *line_s = nullptr; - const char *line_e = nullptr; - const char *field_name_first; - const char *field_name_last; - const char *field_value_first; - const char *field_value_last; - const char *field_line_first; - const char *field_line_last; - int field_name_length, field_value_length; MIMEScanner *scanner = &parser->m_scanner; @@ -2564,22 +2501,23 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl *mh, const char // get a name:value line, with all continuation lines glued into one line // //////////////////////////////////////////////////////////////////////////// - err = mime_scanner_get(scanner, real_s, real_e, &line_s, &line_e, &line_is_real, eof, MIME_SCANNER_TYPE_FIELD); + TextView text{*real_s, real_e}; + TextView parsed; + err = scanner->get(text, parsed, line_is_real, eof, MIMEScanner::FIELD); + *real_s = text.data(); if (err != PARSE_RESULT_OK) { return err; } - line_c = line_s; - ////////////////////////////////////////////////// // if got a LF or CR on its own, end the header // ////////////////////////////////////////////////// - if ((line_e - line_c >= 2) && (line_c[0] == ParseRules::CHAR_CR) && (line_c[1] == ParseRules::CHAR_LF)) { + if ((parsed.size() >= 2) && (parsed[0] == ParseRules::CHAR_CR) && (parsed[1] == ParseRules::CHAR_LF)) { return PARSE_RESULT_DONE; } - if ((line_e - line_c >= 1) && (line_c[0] == ParseRules::CHAR_LF)) { + if ((parsed.size() >= 1) && (parsed[0] == ParseRules::CHAR_LF)) { return PARSE_RESULT_DONE; } @@ -2587,27 +2525,23 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl *mh, const char // find pointers into the name:value field // ///////////////////////////////////////////// - field_line_first = line_c; - field_line_last = line_e - 1; - - // find name first - field_name_first = line_c; /** * Fix for INKqa09141. The is_token function fails for '@' character. * Header names starting with '@' signs are valid headers. Hence we * have to add one more check to see if the first parameter is '@' * character then, the header name is valid. **/ - if ((!ParseRules::is_token(*field_name_first)) && (*field_name_first != '@')) { + if ((!ParseRules::is_token(*parsed)) && (*parsed != '@')) { continue; // toss away garbage line } // find name last - colon = (char *)memchr(line_c, ':', (line_e - line_c)); - if (!colon) { + auto field_value = parsed; // need parsed as is later on. + auto field_name = field_value.split_prefix_at(':'); + if (field_name.empty()) { continue; // toss away garbage line } - field_name_last = colon - 1; + // RFC7230 section 3.2.4: // No whitespace is allowed between the header field-name and colon. In // the past, differences in the handling of such whitespace have led to @@ -2615,57 +2549,44 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl *mh, const char // server MUST reject any received request message that contains // whitespace between a header field-name and colon with a response code // of 400 (Bad Request). - if ((field_name_last >= field_name_first) && is_ws(*field_name_last)) { + if (is_ws(field_name.back())) { return PARSE_RESULT_ERROR; } // find value first - field_value_first = colon + 1; - while ((field_value_first < line_e) && is_ws(*field_value_first)) { - ++field_value_first; - } - - // find_value_last - field_value_last = line_e - 1; - while ((field_value_last >= field_value_first) && ParseRules::is_wslfcr(*field_value_last)) { - --field_value_last; - } - - field_name_length = (int)(field_name_last - field_name_first + 1); - field_value_length = (int)(field_value_last - field_value_first + 1); + field_value.ltrim_if(&ParseRules::is_ws); + field_value.rtrim_if(&ParseRules::is_wslfcr); // Make sure the name or value is not longer than 64K - if (field_name_length >= UINT16_MAX || field_value_length >= UINT16_MAX) { + if (field_name.size() >= UINT16_MAX || field_value.size() >= UINT16_MAX) { return PARSE_RESULT_ERROR; } - int total_line_length = (int)(field_line_last - field_line_first + 1); + // int total_line_length = (int)(field_line_last - field_line_first + 1); ////////////////////////////////////////////////////////////////////// // if we can't leave the name & value in the real buffer, copy them // ////////////////////////////////////////////////////////////////////// if (must_copy_strings || (!line_is_real)) { - int length = total_line_length; - char *dup = heap->duplicate_str(field_name_first, length); - intptr_t delta = dup - field_name_first; - - field_name_first += delta; - field_value_first += delta; + char *dup = heap->duplicate_str(parsed.data(), parsed.size()); + intptr_t delta = dup - parsed.data(); + field_name.assign(field_name.data() + delta, field_name.size()); + field_value.assign(field_value.data() + delta, field_value.size()); } /////////////////////// // tokenize the name // /////////////////////// - int field_name_wks_idx = hdrtoken_tokenize(field_name_first, field_name_length); + int field_name_wks_idx = hdrtoken_tokenize(field_name.data(), field_name.size()); /////////////////////////////////////////// // build and insert the new field object // /////////////////////////////////////////// MIMEField *field = mime_field_create(heap, mh); - mime_field_name_value_set(heap, mh, field, field_name_wks_idx, field_name_first, field_name_length, field_value_first, - field_value_length, true, total_line_length, false); + mime_field_name_value_set(heap, mh, field, field_name_wks_idx, field_name.data(), field_name.size(), field_value.data(), + field_value.size(), true, parsed.size(), false); mime_hdr_field_attach(mh, field, 1, nullptr); } } diff --git a/proxy/hdrs/MIME.h b/proxy/hdrs/MIME.h index 5e78834..972af09 100644 --- a/proxy/hdrs/MIME.h +++ b/proxy/hdrs/MIME.h @@ -25,6 +25,7 @@ #include <sys/time.h> #include <string_view> +#include <string> #include "tscore/ink_assert.h" #include "tscore/ink_apidefs.h" @@ -33,6 +34,8 @@ #include "HdrHeap.h" #include "HdrToken.h" +#include "tscpp/util/TextView.h" + /*********************************************************************** * * * Defines * @@ -58,9 +61,6 @@ enum MimeParseState { MIME_PARSE_AFTER, ///< After a field. }; -#define MIME_SCANNER_TYPE_LINE 0 -#define MIME_SCANNER_TYPE_FIELD 1 - /*********************************************************************** * * * Assertions * @@ -283,13 +283,75 @@ struct MIMEHdrImpl : public HdrHeapObjImpl { * * ***********************************************************************/ +/** A pre-parser used to extract MIME "lines" from raw input for further parsing. + * + * This maintains an internal line buffer which is used to keeping content between calls + * when the parse has not yet completed. + * + */ struct MIMEScanner { - char *m_line; // buffered line being built up - int m_line_length; // size of real live data in buffer - int m_line_size; // total allocated size of buffer - MimeParseState m_state; ///< Parsing machine state. + using self_type = MIMEScanner; ///< Self reference type. +public: + /// Type of input scanning. + enum ScanType { + LINE = 0, ///< Scan a single line. + FIELD = 1, ///< Scan with line folding enabled. + }; + + void init(); ///< Pseudo-constructor required by proxy allocation. + void clear(); ///< Pseudo-destructor required by proxy allocation. + + /// @return The size of the internal line buffer. + size_t get_buffered_line_size() const; + + /** Scan @a input for MIME data delimited by CR/LF end of line markers. + * + * @param input [in,out] Text to scan. + * @param output [out] Parsed text from @a input, if any. + * @param output_shares_input [out] Whether @a output is in @a input. + * @param eof_p [in] The source for @a input is done, no more data will ever be available. + * @param scan_type [in] Whether to check for line folding. + * @return The result of scanning. + * + * @a input is updated to remove text that was scanned. @a output is updated to be a view of the + * scanned @a input. This is separate because @a output may be a view of @a input or a view of the + * internal line buffer. Which of these cases obtains is returned in @a output_shares_input. This + * is @c true if @a output is a view of @a input, and @c false if @a output is a view of the + * internal buffer, but is only set if the result is not @c PARSE_RESULT_CONT (that is, it is not + * set until scanning has completed). If @a scan_type is @c FIELD then folded lines are + * accumulated in to a single line stored in the internal buffer. Otherwise the scanning + * terminates at the first CR/LF. + */ + ParseResult get(ts::TextView &input, ts::TextView &output, bool &output_shares_input, bool eof_p, ScanType scan_type); + +protected: + /** Append @a text to the internal buffer. + * + * @param text Text to append. + * @return @a this + * + * A copy of @a text is appended to the internal line buffer. + */ + self_type &append(ts::TextView text); + + static constexpr MimeParseState INITIAL_PARSE_STATE = MIME_PARSE_BEFORE; + std::string m_line; ///< Internally buffered line data for field coalescence. + MimeParseState m_state{INITIAL_PARSE_STATE}; ///< Parsing machine state. }; +inline size_t +MIMEScanner::get_buffered_line_size() const +{ + return m_line.size(); +} + +inline void +MIMEScanner::clear() +{ + m_line.clear(); + m_state = INITIAL_PARSE_STATE; +} + struct MIMEParser { MIMEScanner m_scanner; int32_t m_field; @@ -692,12 +754,6 @@ void mime_field_name_value_set(HdrHeap *heap, MIMEHdrImpl *mh, MIMEField *field, void mime_field_value_append(HdrHeap *heap, MIMEHdrImpl *mh, MIMEField *field, const char *value, int length, bool prepend_comma, const char separator); -void mime_scanner_init(MIMEScanner *scanner); -void mime_scanner_clear(MIMEScanner *scanner); -void mime_scanner_append(MIMEScanner *scanner, const char *data, int data_size); -ParseResult mime_scanner_get(MIMEScanner *S, const char **raw_input_s, const char *raw_input_e, const char **output_s, - const char **output_e, bool *output_shares_raw_input, bool raw_input_eof, int raw_input_scan_type); - void mime_parser_init(MIMEParser *parser); void mime_parser_clear(MIMEParser *parser); ParseResult mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl *mh, const char **real_s, const char *real_e, diff --git a/proxy/hdrs/Makefile.am b/proxy/hdrs/Makefile.am index 172c0d8..75c3386 100644 --- a/proxy/hdrs/Makefile.am +++ b/proxy/hdrs/Makefile.am @@ -82,6 +82,7 @@ test_proxy_hdrs_CPPFLAGS = $(AM_CPPFLAGS)\ test_proxy_hdrs_SOURCES = \ unit_tests/unit_test_main.cc \ + unit_tests/test_Hdrs.cc \ unit_tests/test_HdrUtils.cc test_proxy_hdrs_LDADD = \ diff --git a/proxy/hdrs/unit_tests/test_Hdrs.cc b/proxy/hdrs/unit_tests/test_Hdrs.cc new file mode 100644 index 0000000..a43503a --- /dev/null +++ b/proxy/hdrs/unit_tests/test_Hdrs.cc @@ -0,0 +1,86 @@ +/** @file + + Catch-based unit tests for various header logic. + This replaces the old regression tests in HdrTest.cc. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. + See the NOTICE file distributed with this work for additional information regarding copyright + ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance with the License. You may obtain a + copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software distributed under the License + is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + or implied. See the License for the specific language governing permissions and limitations under + the License. + */ + +#include <string> +#include <cstring> +#include <cctype> +#include <bitset> +#include <initializer_list> +#include <array> +#include <new> + +#include "catch.hpp" + +#include "HTTP.h" + +// replaces test_http_parser_eos_boundary_cases +TEST_CASE("HdrTest", "[proxy][hdrtest]") +{ + struct Test { + ts::TextView msg; + int expected_result; + int expected_bytes_consumed; + }; + static const std::array<Test, 20> tests = {{ + {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26}, + {"GET /index.html HTTP/1.0\r\n\r\n***BODY****", PARSE_RESULT_DONE, 28}, + {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n***BODY****", PARSE_RESULT_DONE, 48}, + {"GET", PARSE_RESULT_ERROR, 3}, + {"GET /index.html", PARSE_RESULT_ERROR, 15}, + {"GET /index.html\r\n", PARSE_RESULT_ERROR, 17}, + {"GET /index.html HTTP/1.0", PARSE_RESULT_ERROR, 24}, + {"GET /index.html HTTP/1.0\r", PARSE_RESULT_ERROR, 25}, + {"GET /index.html HTTP/1.0\n", PARSE_RESULT_DONE, 25}, + {"GET /index.html HTTP/1.0\n\n", PARSE_RESULT_DONE, 26}, + {"GET /index.html HTTP/1.0\r\n\r\n", PARSE_RESULT_DONE, 28}, + {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar", PARSE_RESULT_ERROR, 44}, + {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 45}, + {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 46}, + {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n", PARSE_RESULT_DONE, 48}, + {"GET /index.html HTTP/1.0\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 44}, + {"GET /index.html HTTP/1.0\nUser-Agent: foobar\nBoo: foo\n", PARSE_RESULT_DONE, 53}, + {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 46}, + {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26}, + {"", PARSE_RESULT_ERROR, 0}, + }}; + + HTTPParser parser; + + http_parser_init(&parser); + + for (auto const &test : tests) { + HTTPHdr req_hdr; + HdrHeap *heap = new_HdrHeap(HDR_HEAP_DEFAULT_SIZE + 64); // extra to prevent proxy allocation. + + req_hdr.create(HTTP_TYPE_REQUEST, heap); + + http_parser_clear(&parser); + + auto start = test.msg.data(); + auto ret = req_hdr.parse_req(&parser, &start, test.msg.data_end(), true); + auto bytes_consumed = start - test.msg.data(); + + REQUIRE(bytes_consumed == test.expected_bytes_consumed); + REQUIRE(ret == test.expected_result); + + req_hdr.destroy(); + } +} diff --git a/proxy/hdrs/unit_tests/unit_test_main.cc b/proxy/hdrs/unit_tests/unit_test_main.cc index 6aed3a6..636c5cc 100644 --- a/proxy/hdrs/unit_tests/unit_test_main.cc +++ b/proxy/hdrs/unit_tests/unit_test_main.cc @@ -21,5 +21,24 @@ limitations under the License. */ -#define CATCH_CONFIG_MAIN +#include "HTTP.h" + +#define CATCH_CONFIG_RUNNER #include "catch.hpp" + +extern int cmd_disable_pfreelist; + +int +main(int argc, char *argv[]) +{ + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + + int result = Catch::Session().run(argc, argv); + + // global clean-up... + + return result; +}