[trafficserver] branch master updated: MIMEScanner: Make MIMEScanner a class, not a POD with free functions.

amc Mon, 25 Feb 2019 10:55:10 -0800

This is an automated email from the ASF dual-hosted git repository.

amc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git



The following commit(s) were added to refs/heads/master by this push:
     new 0be4e73  MIMEScanner: Make MIMEScanner a class, not a POD with free 
functions.
0be4e73 is described below

commit 0be4e7326b89670dea1ffe4c7f21c36bfd2bdf59
Author: Alan M. Carroll <a...@apache.org>
AuthorDate: Wed Feb 6 11:10:22 2019 -0600

    MIMEScanner: Make MIMEScanner a class, not a POD with free functions.
---
 proxy/hdrs/HTTP.cc                      |  27 +--
 proxy/hdrs/HdrTest.cc                   |  79 ---------
 proxy/hdrs/HdrTest.h                    |   1 -
 proxy/hdrs/MIME.cc                      | 303 ++++++++++++--------------------
 proxy/hdrs/MIME.h                       |  82 +++++++--
 proxy/hdrs/Makefile.am                  |   1 +
 proxy/hdrs/unit_tests/test_Hdrs.cc      |  86 +++++++++
 proxy/hdrs/unit_tests/unit_test_main.cc |  21 ++-
 8 files changed, 305 insertions(+), 295 deletions(-)

diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index 41bf41b..eebf4ad 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -894,17 +894,21 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, 
HTTPHdrImpl *hh, const
     const char *version_start;
     const char *version_end;
 
+    ts::TextView text, parsed;
+
     real_end = end;
 
   start:
     hh->m_polarity = HTTP_TYPE_REQUEST;
 
     // Make sure the line is not longer than 64K
-    if (scanner->m_line_length >= UINT16_MAX) {
+    if (scanner->get_buffered_line_size() >= UINT16_MAX) {
       return PARSE_RESULT_ERROR;
     }
 
-    err = mime_scanner_get(scanner, start, real_end, &line_start, &end, 
&line_is_real, eof, MIME_SCANNER_TYPE_LINE);
+    text.assign(*start, real_end);
+    err    = scanner->get(text, parsed, line_is_real, eof, MIMEScanner::LINE);
+    *start = text.data();
     if (err < 0) {
       return err;
     }
@@ -917,9 +921,9 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, 
HTTPHdrImpl *hh, const
       return err;
     }
 
-    cur = line_start;
-    ink_assert((end - cur) >= 0);
-    ink_assert((end - cur) < UINT16_MAX);
+    ink_assert(parsed.size() < UINT16_MAX);
+    line_start = cur = parsed.data();
+    end              = parsed.data_end();
 
     must_copy_strings = (must_copy_strings || (!line_is_real));
 
@@ -1244,11 +1248,14 @@ http_parser_parse_resp(HTTPParser *parser, HdrHeap 
*heap, HTTPHdrImpl *hh, const
     hh->m_polarity = HTTP_TYPE_RESPONSE;
 
     // Make sure the line is not longer than 64K
-    if (scanner->m_line_length >= UINT16_MAX) {
+    if (scanner->get_buffered_line_size() >= UINT16_MAX) {
       return PARSE_RESULT_ERROR;
     }
 
-    err = mime_scanner_get(scanner, start, real_end, &line_start, &end, 
&line_is_real, eof, MIME_SCANNER_TYPE_LINE);
+    ts::TextView text{*start, real_end};
+    ts::TextView parsed;
+    err    = scanner->get(text, parsed, line_is_real, eof, MIMEScanner::LINE);
+    *start = text.data();
     if (err < 0) {
       return err;
     }
@@ -1256,9 +1263,9 @@ http_parser_parse_resp(HTTPParser *parser, HdrHeap *heap, 
HTTPHdrImpl *hh, const
       return err;
     }
 
-    cur = line_start;
-    ink_assert((end - cur) >= 0);
-    ink_assert((end - cur) < UINT16_MAX);
+    ink_assert(parsed.size() < UINT16_MAX);
+    line_start = cur = parsed.data();
+    end              = parsed.data_end();
 
     must_copy_strings = (must_copy_strings || (!line_is_real));
 
diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc
index ee3f93d..875e4e1 100644
--- a/proxy/hdrs/HdrTest.cc
+++ b/proxy/hdrs/HdrTest.cc
@@ -74,7 +74,6 @@ HdrTest::go(RegressionTest *t, int /* atype ATS_UNUSED */)
   status = status & test_url();
   status = status & test_arena();
   status = status & test_regex();
-  status = status & test_http_parser_eos_boundary_cases();
   status = status & test_http_mutation();
   status = status & test_mime();
   status = status & test_http();
@@ -521,84 +520,6 @@ HdrTest::test_mime()
   -------------------------------------------------------------------------*/
 
 int
-HdrTest::test_http_parser_eos_boundary_cases()
-{
-  struct {
-    const char *msg;
-    int expected_result;
-    int expected_bytes_consumed;
-  } tests[] = {
-    {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
-    {"GET /index.html HTTP/1.0\r\n\r\n***BODY****", PARSE_RESULT_DONE, 28},
-    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n***BODY****", 
PARSE_RESULT_DONE, 48},
-    {"GET", PARSE_RESULT_ERROR, 3},
-    {"GET /index.html", PARSE_RESULT_ERROR, 15},
-    {"GET /index.html\r\n", PARSE_RESULT_ERROR, 17},
-    {"GET /index.html HTTP/1.0", PARSE_RESULT_ERROR, 24},
-    {"GET /index.html HTTP/1.0\r", PARSE_RESULT_ERROR, 25},
-    {"GET /index.html HTTP/1.0\n", PARSE_RESULT_DONE, 25},
-    {"GET /index.html HTTP/1.0\n\n", PARSE_RESULT_DONE, 26},
-    {"GET /index.html HTTP/1.0\r\n\r\n", PARSE_RESULT_DONE, 28},
-    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar", PARSE_RESULT_ERROR, 44},
-    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 
45},
-    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 
46},
-    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n", 
PARSE_RESULT_DONE, 48},
-    {"GET /index.html HTTP/1.0\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 44},
-    {"GET /index.html HTTP/1.0\nUser-Agent: foobar\nBoo: foo\n", 
PARSE_RESULT_DONE, 53},
-    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 
46},
-    {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
-    {"", PARSE_RESULT_ERROR, 0},
-    {nullptr, 0, 0},
-  };
-
-  int i, ret, bytes_consumed;
-  const char *orig_start;
-  const char *start;
-  const char *end;
-  HTTPParser parser;
-
-  int failures = 0;
-
-  bri_box("test_http_parser_eos_boundary_cases");
-
-  http_parser_init(&parser);
-
-  for (i = 0; tests[i].msg != nullptr; i++) {
-    HTTPHdr req_hdr;
-
-    start = tests[i].msg;
-    end   = start + strlen(start); // 1 character past end of string
-
-    req_hdr.create(HTTP_TYPE_REQUEST);
-
-    http_parser_clear(&parser);
-
-    orig_start     = start;
-    ret            = req_hdr.parse_req(&parser, &start, end, true);
-    bytes_consumed = (int)(start - orig_start);
-
-    printf("======== test %d (length=%d, consumed=%d)\n", i, 
(int)strlen(tests[i].msg), bytes_consumed);
-    printf("[%s]\n", tests[i].msg);
-    printf("\n[");
-    req_hdr.print(nullptr, 0, nullptr, nullptr);
-    printf("]\n\n");
-
-    if ((ret != tests[i].expected_result) || (bytes_consumed != 
tests[i].expected_bytes_consumed)) {
-      ++failures;
-      printf("FAILED: test %d: retval <expected %d, got %d>, eaten <expected 
%d, got %d>\n\n", i, tests[i].expected_result, ret,
-             tests[i].expected_bytes_consumed, bytes_consumed);
-    }
-
-    req_hdr.destroy();
-  }
-
-  return (failures_to_status("test_http_parser_eos_boundary_cases", failures));
-}
-
-/*-------------------------------------------------------------------------
-  -------------------------------------------------------------------------*/
-
-int
 HdrTest::test_http_aux(const char *request, const char *response)
 {
   int err;
diff --git a/proxy/hdrs/HdrTest.h b/proxy/hdrs/HdrTest.h
index 91a979c..7f8dc83 100644
--- a/proxy/hdrs/HdrTest.h
+++ b/proxy/hdrs/HdrTest.h
@@ -51,7 +51,6 @@ private:
   int test_parse_date();
   int test_format_date();
   int test_url();
-  int test_http_parser_eos_boundary_cases();
   int test_arena();
   int test_regex();
   int test_accept_language_match();
diff --git a/proxy/hdrs/MIME.cc b/proxy/hdrs/MIME.cc
index 81c4622..d130b20 100644
--- a/proxy/hdrs/MIME.cc
+++ b/proxy/hdrs/MIME.cc
@@ -34,6 +34,8 @@
 #include "HdrUtils.h"
 #include "HttpCompat.h"
 
+using ts::TextView;
+
 /***********************************************************************
  *                                                                     *
  *                    C O M P I L E    O P T I O N S                   *
@@ -2302,215 +2304,161 @@ MIMEHdr::get_host_port_values(const char **host_ptr, 
///< Pointer to host.
  *                          P A R S E R                                *
  *                                                                     *
  ***********************************************************************/
-void
-_mime_scanner_init(MIMEScanner *scanner)
-{
-  scanner->m_line        = nullptr;
-  scanner->m_line_size   = 0;
-  scanner->m_line_length = 0;
-  scanner->m_state       = MIME_PARSE_BEFORE;
-}
 
-//////////////////////////////////////////////////////
-// init     first time structure setup              //
-// clear    resets an already-initialized structure //
-//////////////////////////////////////////////////////
 void
-mime_scanner_init(MIMEScanner *scanner)
+MIMEScanner::init()
 {
-  _mime_scanner_init(scanner);
+  m_state = INITIAL_PARSE_STATE;
+  // Ugly, but required because of how proxy allocation works - that leaves 
the instance in a
+  // random state, so even assigning to it can crash. Because this method 
substitutes for a real
+  // constructor in the proxy allocation system, call the CTOR here. Any 
memory that gets allocated
+  // is supposed to be cleaned up by calling @c clear on this object.
+  new (&m_line) std::string;
 }
 
-// clear is to reset an already initialized structure
-void
-mime_scanner_clear(MIMEScanner *scanner)
+MIMEScanner &
+MIMEScanner::append(TextView text)
 {
-  ats_free(scanner->m_line);
-  _mime_scanner_init(scanner);
-}
-
-void
-mime_scanner_append(MIMEScanner *scanner, const char *data, int data_size)
-{
-  int free_size = scanner->m_line_size - scanner->m_line_length;
-
-  //////////////////////////////////////////////////////
-  // if not enough space, allocate or grow the buffer //
-  //////////////////////////////////////////////////////
-  if (data_size > free_size) {       // need to allocate/grow the buffer
-    if (scanner->m_line_size == 0) { // buffer should be at least 128 bytes
-      scanner->m_line_size = 128;
-    }
-
-    while (free_size < data_size) { // grow buffer by powers of 2
-      scanner->m_line_size *= 2;
-      free_size = scanner->m_line_size - scanner->m_line_length;
-    }
-
-    if (scanner->m_line == nullptr) { // if no buffer yet, allocate one
-      scanner->m_line = (char *)ats_malloc(scanner->m_line_size);
-    } else {
-      scanner->m_line = (char *)ats_realloc(scanner->m_line, 
scanner->m_line_size);
-    }
-  }
-  ////////////////////////////////////////////////
-  // append new data onto the end of the buffer //
-  ////////////////////////////////////////////////
-
-  memcpy(&(scanner->m_line[scanner->m_line_length]), data, data_size);
-  scanner->m_line_length += data_size;
+  m_line += text;
+  return *this;
 }
 
 ParseResult
-mime_scanner_get(MIMEScanner *S, const char **raw_input_s, const char 
*raw_input_e, const char **output_s, const char **output_e,
-                 bool *output_shares_raw_input,
-                 bool raw_input_eof, ///< All data has been received for this 
header.
-                 int raw_input_scan_type)
+MIMEScanner::get(TextView &input, TextView &output, bool &output_shares_input, 
bool eof_p, ScanType scan_type)
 {
-  const char *raw_input_c, *lf_ptr;
   ParseResult zret = PARSE_RESULT_CONT;
   // Need this for handling dangling CR.
-  static const char RAW_CR = ParseRules::CHAR_CR;
-
-  ink_assert((raw_input_s != nullptr) && (*raw_input_s != nullptr));
-  ink_assert(raw_input_e != nullptr);
+  static const char RAW_CR{ParseRules::CHAR_CR};
 
-  raw_input_c = *raw_input_s;
-
-  while (PARSE_RESULT_CONT == zret && raw_input_c < raw_input_e) {
-    ptrdiff_t runway = raw_input_e - raw_input_c; // remaining input.
-    switch (S->m_state) {
+  auto text = input;
+  while (PARSE_RESULT_CONT == zret && !text.empty()) {
+    switch (m_state) {
     case MIME_PARSE_BEFORE: // waiting to find a field.
-      if (ParseRules::is_cr(*raw_input_c)) {
-        ++raw_input_c;
-        if (runway >= 2 && ParseRules::is_lf(*raw_input_c)) {
+      if (ParseRules::is_cr(*text)) {
+        ++text;
+        if (!text.empty() && ParseRules::is_lf(*text)) {
           // optimize a bit - this happens >99% of the time after a CR.
-          ++raw_input_c;
+          ++text;
           zret = PARSE_RESULT_DONE;
         } else {
-          S->m_state = MIME_PARSE_FOUND_CR;
+          m_state = MIME_PARSE_FOUND_CR;
         }
-      } else if (ParseRules::is_lf(*raw_input_c)) {
-        ++raw_input_c;
+      } else if (ParseRules::is_lf(*text)) {
+        ++text;
         zret = PARSE_RESULT_DONE; // Required by regression test.
       } else {
         // consume this character in the next state.
-        S->m_state = MIME_PARSE_INSIDE;
+        m_state = MIME_PARSE_INSIDE;
       }
       break;
     case MIME_PARSE_FOUND_CR:
-      // Looking for a field and found a CR, which should mean terminating
-      // the header. Note that we've left the CR in the input so we have
-      // to skip over it.
-      if (ParseRules::is_lf(*raw_input_c)) {
-        // Header terminated.
-        ++raw_input_c;
+      // Looking for a field and found a CR, which should mean terminating the 
header.
+      if (ParseRules::is_lf(*text)) {
+        ++text;
         zret = PARSE_RESULT_DONE;
       } else {
-        // This really should be an error (spec doesn't permit lone CR)
-        // but the regression tests require it.
-        mime_scanner_append(S, &RAW_CR, 1);
-        S->m_state = MIME_PARSE_INSIDE;
+        // This really should be an error (spec doesn't permit lone CR) but 
the regression tests
+        // require it.
+        this->append({&RAW_CR, 1});
+        m_state = MIME_PARSE_INSIDE;
       }
       break;
-    case MIME_PARSE_INSIDE:
-      lf_ptr = static_cast<const char *>(memchr(raw_input_c, 
ParseRules::CHAR_LF, runway));
-      if (lf_ptr) {
-        raw_input_c = lf_ptr + 1;
-        if (MIME_SCANNER_TYPE_LINE == raw_input_scan_type) {
-          zret       = PARSE_RESULT_OK;
-          S->m_state = MIME_PARSE_BEFORE;
+    case MIME_PARSE_INSIDE: {
+      auto lf_off = text.find(ParseRules::CHAR_LF);
+      if (lf_off != TextView::npos) {
+        text.remove_prefix(lf_off + 1); // drop up to and including LF
+        if (LINE == scan_type) {
+          zret    = PARSE_RESULT_OK;
+          m_state = MIME_PARSE_BEFORE;
         } else {
-          S->m_state = MIME_PARSE_AFTER;
+          m_state = MIME_PARSE_AFTER; // looking for line folding.
         }
-      } else {
-        raw_input_c = raw_input_e; // grab all that's available.
+      } else { // no EOL, consume all text without changing state.
+        text.remove_prefix(text.size());
       }
-      break;
+    } break;
     case MIME_PARSE_AFTER:
-      // After a LF. Might be the end or a continuation.
-      if (ParseRules::is_ws(*raw_input_c)) {
-        char *unfold = const_cast<char *>(raw_input_c - 1);
-
-        *unfold-- = ' ';
+      // After a LF, the next line might be a continuation / folded line. 
That's indicated by a
+      // starting whitespace. If that's the case, back up over the preceding 
CR/LF with space and
+      // pretend it's the same line.
+      if (ParseRules::is_ws(*text)) { // folded line.
+        char *unfold = const_cast<char *>(text.data() - 1);
+        *unfold--    = ' ';
         if (ParseRules::is_cr(*unfold)) {
           *unfold = ' ';
         }
-        S->m_state = MIME_PARSE_INSIDE; // back inside the field.
+        m_state = MIME_PARSE_INSIDE; // back inside the field.
       } else {
-        S->m_state = MIME_PARSE_BEFORE; // field terminated.
-        zret       = PARSE_RESULT_OK;
+        m_state = MIME_PARSE_BEFORE; // field terminated.
+        zret    = PARSE_RESULT_OK;
       }
       break;
     }
   }
 
-  ptrdiff_t data_size = raw_input_c - *raw_input_s;
+  TextView parsed_text{input.data(), text.data()};
+  bool save_parsed_text_p = !parsed_text.empty();
 
   if (PARSE_RESULT_CONT == zret) {
-    // data ran out before we got a clear final result.
-    // There a number of things we need to check and possibly adjust
-    // that result. It's less complex to do this cleanup than handle
-    // in the parser state machine.
-    if (raw_input_eof) {
+    // data ran out before we got a clear final result. There a number of 
things we need to check
+    // and possibly adjust that result. It's less complex to do this cleanup 
than handle all of
+    // these checks in the parser state machine.
+    if (eof_p) {
       // Should never return PARSE_CONT if we've hit EOF.
-      if (0 == data_size) {
+      if (parsed_text.empty()) {
         // all input previously consumed. If we're between fields, that's cool.
-        if (MIME_PARSE_INSIDE != S->m_state) {
-          S->m_state = MIME_PARSE_BEFORE; // probably not needed...
-          zret       = PARSE_RESULT_DONE;
+        if (MIME_PARSE_INSIDE != m_state) {
+          m_state = MIME_PARSE_BEFORE; // probably not needed...
+          zret    = PARSE_RESULT_DONE;
         } else {
           zret = PARSE_RESULT_ERROR; // unterminated field.
         }
-      } else if (MIME_PARSE_AFTER == S->m_state) {
-        // Special case it seems - need to accept the final field
-        // even if there's no header terminating CR LF. We check for
-        // absolute end of input because otherwise this might be
-        // a multiline field where we haven't seen the next leading space.
-        S->m_state = MIME_PARSE_BEFORE;
-        zret       = PARSE_RESULT_OK;
+      } else if (MIME_PARSE_AFTER == m_state) {
+        // Special case it seems - need to accept the final field even if 
there's no header
+        // terminating CR LF. This is only reasonable after absolute end of 
input (EOF) because
+        // otherwise this might be a multiline field where we haven't seen the 
next leading space.
+        m_state = MIME_PARSE_BEFORE;
+        zret    = PARSE_RESULT_OK;
       } else {
         // Partial input, no field / line CR LF
         zret = PARSE_RESULT_ERROR; // Unterminated field.
       }
-    } else if (data_size) {
-      if (MIME_PARSE_INSIDE == S->m_state) {
+    } else if (!parsed_text.empty()) {
+      if (MIME_PARSE_INSIDE == m_state) {
         // Inside a field but more data is expected. Save what we've got.
-        mime_scanner_append(S, *raw_input_s, data_size);
-        data_size = 0; // Don't append again.
-      } else if (MIME_PARSE_AFTER == S->m_state) {
+        this->append(parsed_text);  // Do this here to force appending.
+        save_parsed_text_p = false; // don't double append.
+      } else if (MIME_PARSE_AFTER == m_state) {
         // After a field but we still have data. Need to parse it too.
-        S->m_state = MIME_PARSE_BEFORE;
-        zret       = PARSE_RESULT_OK;
+        m_state = MIME_PARSE_BEFORE;
+        zret    = PARSE_RESULT_OK;
       }
     }
   }
 
-  if (data_size && S->m_line_length) {
+  if (save_parsed_text_p && !m_line.empty()) {
     // If we're already accumulating, continue to do so if we have data.
-    mime_scanner_append(S, *raw_input_s, data_size);
+    this->append(parsed_text);
   }
-  // No sharing if we've accumulated data (really, force this to make compiler 
shut up).
-  *output_shares_raw_input = 0 == S->m_line_length;
 
   // adjust out arguments.
   if (PARSE_RESULT_CONT != zret) {
-    if (0 != S->m_line_length) {
-      *output_s        = S->m_line;
-      *output_e        = *output_s + S->m_line_length;
-      S->m_line_length = 0;
+    if (!m_line.empty()) {
+      output = m_line;
+      m_line.resize(0); // depending resize(0) not deallocating internal 
string memory.
+      output_shares_input = false;
     } else {
-      *output_s = *raw_input_s;
-      *output_e = raw_input_c;
+      output              = parsed_text;
+      output_shares_input = true;
     }
   }
 
-  // Make sure there are no '\0' in the input scanned so far
-  if (zret != PARSE_RESULT_ERROR && memchr(*raw_input_s, '\0', raw_input_c - 
*raw_input_s) != nullptr) {
+  // Make sure there are no null characters in the input scanned so far
+  if (zret != PARSE_RESULT_ERROR && TextView::npos != parsed_text.find('\0')) {
     zret = PARSE_RESULT_ERROR;
   }
 
-  *raw_input_s = raw_input_c; // mark input consumed.
+  input.remove_prefix(parsed_text.size());
   return zret;
 }
 
@@ -2528,14 +2476,14 @@ _mime_parser_init(MIMEParser *parser)
 void
 mime_parser_init(MIMEParser *parser)
 {
-  mime_scanner_init(&parser->m_scanner);
+  parser->m_scanner.init();
   _mime_parser_init(parser);
 }
 
 void
 mime_parser_clear(MIMEParser *parser)
 {
-  mime_scanner_clear(&parser->m_scanner);
+  parser->m_scanner.clear();
   _mime_parser_init(parser);
 }
 
@@ -2545,17 +2493,6 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, 
MIMEHdrImpl *mh, const char
 {
   ParseResult err;
   bool line_is_real;
-  const char *colon;
-  const char *line_c;
-  const char *line_s = nullptr;
-  const char *line_e = nullptr;
-  const char *field_name_first;
-  const char *field_name_last;
-  const char *field_value_first;
-  const char *field_value_last;
-  const char *field_line_first;
-  const char *field_line_last;
-  int field_name_length, field_value_length;
 
   MIMEScanner *scanner = &parser->m_scanner;
 
@@ -2564,22 +2501,23 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, 
MIMEHdrImpl *mh, const char
     // get a name:value line, with all continuation lines glued into one line 
//
     
////////////////////////////////////////////////////////////////////////////
 
-    err = mime_scanner_get(scanner, real_s, real_e, &line_s, &line_e, 
&line_is_real, eof, MIME_SCANNER_TYPE_FIELD);
+    TextView text{*real_s, real_e};
+    TextView parsed;
+    err     = scanner->get(text, parsed, line_is_real, eof, 
MIMEScanner::FIELD);
+    *real_s = text.data();
     if (err != PARSE_RESULT_OK) {
       return err;
     }
 
-    line_c = line_s;
-
     //////////////////////////////////////////////////
     // if got a LF or CR on its own, end the header //
     //////////////////////////////////////////////////
 
-    if ((line_e - line_c >= 2) && (line_c[0] == ParseRules::CHAR_CR) && 
(line_c[1] == ParseRules::CHAR_LF)) {
+    if ((parsed.size() >= 2) && (parsed[0] == ParseRules::CHAR_CR) && 
(parsed[1] == ParseRules::CHAR_LF)) {
       return PARSE_RESULT_DONE;
     }
 
-    if ((line_e - line_c >= 1) && (line_c[0] == ParseRules::CHAR_LF)) {
+    if ((parsed.size() >= 1) && (parsed[0] == ParseRules::CHAR_LF)) {
       return PARSE_RESULT_DONE;
     }
 
@@ -2587,27 +2525,23 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, 
MIMEHdrImpl *mh, const char
     // find pointers into the name:value field //
     /////////////////////////////////////////////
 
-    field_line_first = line_c;
-    field_line_last  = line_e - 1;
-
-    // find name first
-    field_name_first = line_c;
     /**
      * Fix for INKqa09141. The is_token function fails for '@' character.
      * Header names starting with '@' signs are valid headers. Hence we
      * have to add one more check to see if the first parameter is '@'
      * character then, the header name is valid.
      **/
-    if ((!ParseRules::is_token(*field_name_first)) && (*field_name_first != 
'@')) {
+    if ((!ParseRules::is_token(*parsed)) && (*parsed != '@')) {
       continue; // toss away garbage line
     }
 
     // find name last
-    colon = (char *)memchr(line_c, ':', (line_e - line_c));
-    if (!colon) {
+    auto field_value = parsed; // need parsed as is later on.
+    auto field_name  = field_value.split_prefix_at(':');
+    if (field_name.empty()) {
       continue; // toss away garbage line
     }
-    field_name_last = colon - 1;
+
     // RFC7230 section 3.2.4:
     // No whitespace is allowed between the header field-name and colon.  In
     // the past, differences in the handling of such whitespace have led to
@@ -2615,57 +2549,44 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, 
MIMEHdrImpl *mh, const char
     // server MUST reject any received request message that contains
     // whitespace between a header field-name and colon with a response code
     // of 400 (Bad Request).
-    if ((field_name_last >= field_name_first) && is_ws(*field_name_last)) {
+    if (is_ws(field_name.back())) {
       return PARSE_RESULT_ERROR;
     }
 
     // find value first
-    field_value_first = colon + 1;
-    while ((field_value_first < line_e) && is_ws(*field_value_first)) {
-      ++field_value_first;
-    }
-
-    // find_value_last
-    field_value_last = line_e - 1;
-    while ((field_value_last >= field_value_first) && 
ParseRules::is_wslfcr(*field_value_last)) {
-      --field_value_last;
-    }
-
-    field_name_length  = (int)(field_name_last - field_name_first + 1);
-    field_value_length = (int)(field_value_last - field_value_first + 1);
+    field_value.ltrim_if(&ParseRules::is_ws);
+    field_value.rtrim_if(&ParseRules::is_wslfcr);
 
     // Make sure the name or value is not longer than 64K
-    if (field_name_length >= UINT16_MAX || field_value_length >= UINT16_MAX) {
+    if (field_name.size() >= UINT16_MAX || field_value.size() >= UINT16_MAX) {
       return PARSE_RESULT_ERROR;
     }
 
-    int total_line_length = (int)(field_line_last - field_line_first + 1);
+    //    int total_line_length = (int)(field_line_last - field_line_first + 
1);
 
     //////////////////////////////////////////////////////////////////////
     // if we can't leave the name & value in the real buffer, copy them //
     //////////////////////////////////////////////////////////////////////
 
     if (must_copy_strings || (!line_is_real)) {
-      int length     = total_line_length;
-      char *dup      = heap->duplicate_str(field_name_first, length);
-      intptr_t delta = dup - field_name_first;
-
-      field_name_first += delta;
-      field_value_first += delta;
+      char *dup      = heap->duplicate_str(parsed.data(), parsed.size());
+      intptr_t delta = dup - parsed.data();
+      field_name.assign(field_name.data() + delta, field_name.size());
+      field_value.assign(field_value.data() + delta, field_value.size());
     }
     ///////////////////////
     // tokenize the name //
     ///////////////////////
 
-    int field_name_wks_idx = hdrtoken_tokenize(field_name_first, 
field_name_length);
+    int field_name_wks_idx = hdrtoken_tokenize(field_name.data(), 
field_name.size());
 
     ///////////////////////////////////////////
     // build and insert the new field object //
     ///////////////////////////////////////////
 
     MIMEField *field = mime_field_create(heap, mh);
-    mime_field_name_value_set(heap, mh, field, field_name_wks_idx, 
field_name_first, field_name_length, field_value_first,
-                              field_value_length, true, total_line_length, 
false);
+    mime_field_name_value_set(heap, mh, field, field_name_wks_idx, 
field_name.data(), field_name.size(), field_value.data(),
+                              field_value.size(), true, parsed.size(), false);
     mime_hdr_field_attach(mh, field, 1, nullptr);
   }
 }
diff --git a/proxy/hdrs/MIME.h b/proxy/hdrs/MIME.h
index 5e78834..972af09 100644
--- a/proxy/hdrs/MIME.h
+++ b/proxy/hdrs/MIME.h
@@ -25,6 +25,7 @@
 
 #include <sys/time.h>
 #include <string_view>
+#include <string>
 
 #include "tscore/ink_assert.h"
 #include "tscore/ink_apidefs.h"
@@ -33,6 +34,8 @@
 #include "HdrHeap.h"
 #include "HdrToken.h"
 
+#include "tscpp/util/TextView.h"
+
 /***********************************************************************
  *                                                                     *
  *                              Defines                                *
@@ -58,9 +61,6 @@ enum MimeParseState {
   MIME_PARSE_AFTER,    ///< After a field.
 };
 
-#define MIME_SCANNER_TYPE_LINE 0
-#define MIME_SCANNER_TYPE_FIELD 1
-
 /***********************************************************************
  *                                                                     *
  *                              Assertions                             *
@@ -283,13 +283,75 @@ struct MIMEHdrImpl : public HdrHeapObjImpl {
  *                                                                     *
  ***********************************************************************/
 
+/** A pre-parser used to extract MIME "lines" from raw input for further 
parsing.
+ *
+ * This maintains an internal line buffer which is used to keeping content 
between calls
+ * when the parse has not yet completed.
+ *
+ */
 struct MIMEScanner {
-  char *m_line;           // buffered line being built up
-  int m_line_length;      // size of real live data in buffer
-  int m_line_size;        // total allocated size of buffer
-  MimeParseState m_state; ///< Parsing machine state.
+  using self_type = MIMEScanner; ///< Self reference type.
+public:
+  /// Type of input scanning.
+  enum ScanType {
+    LINE  = 0, ///< Scan a single line.
+    FIELD = 1, ///< Scan with line folding enabled.
+  };
+
+  void init();  ///< Pseudo-constructor required by proxy allocation.
+  void clear(); ///< Pseudo-destructor required by proxy allocation.
+
+  /// @return The size of the internal line buffer.
+  size_t get_buffered_line_size() const;
+
+  /** Scan @a input for MIME data delimited by CR/LF end of line markers.
+   *
+   * @param input [in,out] Text to scan.
+   * @param output [out] Parsed text from @a input, if any.
+   * @param output_shares_input [out] Whether @a output is in @a input.
+   * @param eof_p [in] The source for @a input is done, no more data will ever 
be available.
+   * @param scan_type [in] Whether to check for line folding.
+   * @return The result of scanning.
+   *
+   * @a input is updated to remove text that was scanned. @a output is updated 
to be a view of the
+   * scanned @a input. This is separate because @a output may be a view of @a 
input or a view of the
+   * internal line buffer. Which of these cases obtains is returned in @a 
output_shares_input. This
+   * is @c true if @a output is a view of @a input, and @c false if @a output 
is a view of the
+   * internal buffer, but is only set if the result is not @c 
PARSE_RESULT_CONT (that is, it is not
+   * set until scanning has completed). If @a scan_type is @c FIELD then 
folded lines are
+   * accumulated in to a single line stored in the internal buffer. Otherwise 
the scanning
+   * terminates at the first CR/LF.
+   */
+  ParseResult get(ts::TextView &input, ts::TextView &output, bool 
&output_shares_input, bool eof_p, ScanType scan_type);
+
+protected:
+  /** Append @a text to the internal buffer.
+   *
+   * @param text Text to append.
+   * @return @a this
+   *
+   * A copy of @a text is appended to the internal line buffer.
+   */
+  self_type &append(ts::TextView text);
+
+  static constexpr MimeParseState INITIAL_PARSE_STATE = MIME_PARSE_BEFORE;
+  std::string m_line;                          ///< Internally buffered line 
data for field coalescence.
+  MimeParseState m_state{INITIAL_PARSE_STATE}; ///< Parsing machine state.
 };
 
+inline size_t
+MIMEScanner::get_buffered_line_size() const
+{
+  return m_line.size();
+}
+
+inline void
+MIMEScanner::clear()
+{
+  m_line.clear();
+  m_state = INITIAL_PARSE_STATE;
+}
+
 struct MIMEParser {
   MIMEScanner m_scanner;
   int32_t m_field;
@@ -692,12 +754,6 @@ void mime_field_name_value_set(HdrHeap *heap, MIMEHdrImpl 
*mh, MIMEField *field,
 void mime_field_value_append(HdrHeap *heap, MIMEHdrImpl *mh, MIMEField *field, 
const char *value, int length, bool prepend_comma,
                              const char separator);
 
-void mime_scanner_init(MIMEScanner *scanner);
-void mime_scanner_clear(MIMEScanner *scanner);
-void mime_scanner_append(MIMEScanner *scanner, const char *data, int 
data_size);
-ParseResult mime_scanner_get(MIMEScanner *S, const char **raw_input_s, const 
char *raw_input_e, const char **output_s,
-                             const char **output_e, bool 
*output_shares_raw_input, bool raw_input_eof, int raw_input_scan_type);
-
 void mime_parser_init(MIMEParser *parser);
 void mime_parser_clear(MIMEParser *parser);
 ParseResult mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl 
*mh, const char **real_s, const char *real_e,
diff --git a/proxy/hdrs/Makefile.am b/proxy/hdrs/Makefile.am
index 172c0d8..75c3386 100644
--- a/proxy/hdrs/Makefile.am
+++ b/proxy/hdrs/Makefile.am
@@ -82,6 +82,7 @@ test_proxy_hdrs_CPPFLAGS = $(AM_CPPFLAGS)\
 
 test_proxy_hdrs_SOURCES = \
        unit_tests/unit_test_main.cc \
+       unit_tests/test_Hdrs.cc \
        unit_tests/test_HdrUtils.cc
 
 test_proxy_hdrs_LDADD = \
diff --git a/proxy/hdrs/unit_tests/test_Hdrs.cc 
b/proxy/hdrs/unit_tests/test_Hdrs.cc
new file mode 100644
index 0000000..a43503a
--- /dev/null
+++ b/proxy/hdrs/unit_tests/test_Hdrs.cc
@@ -0,0 +1,86 @@
+/** @file
+
+   Catch-based unit tests for various header logic.
+   This replaces the old regression tests in HdrTest.cc.
+
+   @section license License
+
+   Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements.
+   See the NOTICE file distributed with this work for additional information 
regarding copyright
+   ownership.  The ASF licenses this file to you under the Apache License, 
Version 2.0 (the
+   "License"); you may not use this file except in compliance with the 
License.  You may obtain a
+   copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software 
distributed under the License
+   is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express
+   or implied. See the License for the specific language governing permissions 
and limitations under
+   the License.
+ */
+
+#include <string>
+#include <cstring>
+#include <cctype>
+#include <bitset>
+#include <initializer_list>
+#include <array>
+#include <new>
+
+#include "catch.hpp"
+
+#include "HTTP.h"
+
+// replaces test_http_parser_eos_boundary_cases
+TEST_CASE("HdrTest", "[proxy][hdrtest]")
+{
+  struct Test {
+    ts::TextView msg;
+    int expected_result;
+    int expected_bytes_consumed;
+  };
+  static const std::array<Test, 20> tests = {{
+    {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
+    {"GET /index.html HTTP/1.0\r\n\r\n***BODY****", PARSE_RESULT_DONE, 28},
+    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n***BODY****", 
PARSE_RESULT_DONE, 48},
+    {"GET", PARSE_RESULT_ERROR, 3},
+    {"GET /index.html", PARSE_RESULT_ERROR, 15},
+    {"GET /index.html\r\n", PARSE_RESULT_ERROR, 17},
+    {"GET /index.html HTTP/1.0", PARSE_RESULT_ERROR, 24},
+    {"GET /index.html HTTP/1.0\r", PARSE_RESULT_ERROR, 25},
+    {"GET /index.html HTTP/1.0\n", PARSE_RESULT_DONE, 25},
+    {"GET /index.html HTTP/1.0\n\n", PARSE_RESULT_DONE, 26},
+    {"GET /index.html HTTP/1.0\r\n\r\n", PARSE_RESULT_DONE, 28},
+    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar", PARSE_RESULT_ERROR, 44},
+    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 
45},
+    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 
46},
+    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n", 
PARSE_RESULT_DONE, 48},
+    {"GET /index.html HTTP/1.0\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 44},
+    {"GET /index.html HTTP/1.0\nUser-Agent: foobar\nBoo: foo\n", 
PARSE_RESULT_DONE, 53},
+    {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE, 
46},
+    {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
+    {"", PARSE_RESULT_ERROR, 0},
+  }};
+
+  HTTPParser parser;
+
+  http_parser_init(&parser);
+
+  for (auto const &test : tests) {
+    HTTPHdr req_hdr;
+    HdrHeap *heap = new_HdrHeap(HDR_HEAP_DEFAULT_SIZE + 64); // extra to 
prevent proxy allocation.
+
+    req_hdr.create(HTTP_TYPE_REQUEST, heap);
+
+    http_parser_clear(&parser);
+
+    auto start          = test.msg.data();
+    auto ret            = req_hdr.parse_req(&parser, &start, 
test.msg.data_end(), true);
+    auto bytes_consumed = start - test.msg.data();
+
+    REQUIRE(bytes_consumed == test.expected_bytes_consumed);
+    REQUIRE(ret == test.expected_result);
+
+    req_hdr.destroy();
+  }
+}
diff --git a/proxy/hdrs/unit_tests/unit_test_main.cc 
b/proxy/hdrs/unit_tests/unit_test_main.cc
index 6aed3a6..636c5cc 100644
--- a/proxy/hdrs/unit_tests/unit_test_main.cc
+++ b/proxy/hdrs/unit_tests/unit_test_main.cc
@@ -21,5 +21,24 @@
   limitations under the License.
  */
 
-#define CATCH_CONFIG_MAIN
+#include "HTTP.h"
+
+#define CATCH_CONFIG_RUNNER
 #include "catch.hpp"
+
+extern int cmd_disable_pfreelist;
+
+int
+main(int argc, char *argv[])
+{
+  // No thread setup, forbid use of thread local allocators.
+  cmd_disable_pfreelist = true;
+  // Get all of the HTTP WKS items populated.
+  http_init();
+
+  int result = Catch::Session().run(argc, argv);
+
+  // global clean-up...
+
+  return result;
+}

[trafficserver] branch master updated: MIMEScanner: Make MIMEScanner a class, not a POD with free functions.

Reply via email to