This is an automated email from the ASF dual-hosted git repository.

shinrich pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d7aeb0  traffic_dump: don't dump cookies from the wire (#6586)
4d7aeb0 is described below

commit 4d7aeb09f83e447b2d3112d23bb0a98ba32ca51d
Author: Brian Neradt <brian.ner...@gmail.com>
AuthorDate: Fri Apr 10 15:00:04 2020 -0400

    traffic_dump: don't dump cookies from the wire (#6586)
    
    Cookies are often very sensitive information and should not be logged.
    This change modifies traffic_dump to not dump the actual cookies from
    the wire.
    
    --promiscuous-mode is added in case the user wants to dump sensitive
    headers.
    
    --sensitive-fields is added so the user can treat custom fields as
    sensitive.
    
    Co-authored-by: bneradt <bner...@verizonmedia.com>
---
 doc/admin-guide/plugins/traffic_dump.en.rst        |   6 +-
 plugins/experimental/traffic_dump/traffic_dump.cc  | 140 ++++++++++++++++++++-
 .../pluginTest/traffic_dump/gold/200.gold          |   1 +
 .../pluginTest/traffic_dump/traffic_dump.test.py   |  46 ++++---
 .../pluginTest/traffic_dump/verify_replay.py       |  55 ++++++++
 5 files changed, 226 insertions(+), 22 deletions(-)

diff --git a/doc/admin-guide/plugins/traffic_dump.en.rst 
b/doc/admin-guide/plugins/traffic_dump.en.rst
index 45c7d51..eece215 100644
--- a/doc/admin-guide/plugins/traffic_dump.en.rst
+++ b/doc/admin-guide/plugins/traffic_dump.en.rst
@@ -46,6 +46,10 @@ Plugin Configuration
 
    (`required`) - specifies the max disk usage N bytes (approximate). Traffic 
Dump will stop capturing new sessions once disk usage exceeds this limit.
 
+   .. option:: --sensitive-fields <field1,field2,...,fieldn>
+
+   (`optional`) - a comma seperatated list of HTTP case-insensitive field 
names whose values are considered sensitive information. Traffic Dump will not 
dump the incoming field values for any of these fields but will instead dump a 
generic value for them of the same length as the original. If this option is 
not used, a default list of "Cookie,Set-Cookie" is used. Providing this option 
overwrites that default list with whatever values the user provides. Pass a 
quoted empty string as the a [...]
+
 ``traffic_ctl`` <command>
    * ``traffic_ctl plugin msg traffic_dump.sample N`` - changes the sampling 
ratio N as mentioned above.
    * ``traffic_ctl plugin msg traffic_dump.reset`` - resets the disk usage 
counter.
@@ -57,7 +61,7 @@ This format contains traffic data including:
 
 * Each session and transactions in the session.
 * Timestamps.
-* The four headers (ua request, proxy request, origin server response, proxy 
response).
+* The four sets of headers (user agent request, proxy request, origin server 
response, proxy response).
 * The protocol stack for the user agent.
 * The transaction count for the outbound session.
 * The content block sizes.
diff --git a/plugins/experimental/traffic_dump/traffic_dump.cc 
b/plugins/experimental/traffic_dump/traffic_dump.cc
index 31563b6..cfe5b36 100644
--- a/plugins/experimental/traffic_dump/traffic_dump.cc
+++ b/plugins/experimental/traffic_dump/traffic_dump.cc
@@ -32,20 +32,59 @@
 #include <arpa/inet.h>
 #include <netinet/in.h>
 
+#include <algorithm>
 #include <sstream>
 #include <iomanip>
 #include <chrono>
 #include <atomic>
 #include <string>
 #include <string_view>
+#include <unordered_set>
 
 #include "tscore/ts_file.h"
+#include "tscpp/util/TextView.h"
 #include "ts/ts.h"
 
 namespace
 {
 const char *PLUGIN_NAME   = "traffic_dump";
 const std::string closing = "]}]}";
+std::string defaut_sensitive_field_value;
+
+// A case-insensitive comparitor used for comparing HTTP field names.
+struct InsensitiveCompare {
+  bool
+  operator()(std::string_view a, std::string_view b) const
+  {
+    return strcasecmp(a, b) == 0;
+  }
+};
+
+struct StringHashByLower {
+public:
+  size_t
+  operator()(const std::string &str) const
+  {
+    std::string lower;
+    std::transform(str.begin(), str.end(), lower.begin(), [](unsigned char c) 
-> unsigned char { return std::tolower(c); });
+    return std::hash<std::string>()(lower);
+  }
+};
+
+/// Fields considered sensitive because they may contain user-private
+/// information. These fields are replaced with auto-generated generic content
+/// by default. To turn off this behavior, the user should add the
+/// --promiscuous-mode flag as a commandline argument.
+///
+/// While these are specified with case, they are matched case-insensitively.
+std::unordered_set<std::string, StringHashByLower, InsensitiveCompare> 
default_sensitive_fields = {
+  "Set-Cookie",
+  "Cookie",
+};
+
+/// The set of fields, default and user-specified, that are sensitive and whose
+/// values will be replaced with auto-generated generic content.
+std::unordered_set<std::string, StringHashByLower, InsensitiveCompare> 
sensitive_fields;
 
 ts::file::path log_path{"dump"};               // default log directory
 int s_arg_idx = 0;                             // Session Arg Index to pass on 
session data
@@ -183,10 +222,10 @@ esc_json_out(const char *buf, int64_t len, std::ostream 
&jsonfile)
 
 /// escape_json(): escape chars in a string and returns json string
 std::string
-escape_json(std::string const &s)
+escape_json(std::string_view s)
 {
   std::ostringstream o;
-  esc_json_out(s.c_str(), s.length(), o);
+  esc_json_out(s.data(), s.length(), o);
   return o.str();
 }
 std::string
@@ -205,9 +244,9 @@ json_entry(std::string const &name, const char *value, 
int64_t size)
 
 /// json_entry_array(): Formats to array-style entry i.e. ["field","value"]
 inline std::string
-json_entry_array(const char *name, int name_len, const char *value, int 
value_len)
+json_entry_array(std::string_view name, std::string_view value)
 {
-  return "[\"" + escape_json(name, name_len) + "\", \"" + escape_json(value, 
value_len) + "\"]";
+  return "[\"" + escape_json(name) + "\", \"" + escape_json(value) + "\"]";
 }
 
 /** Remove the scheme prefix from the url.
@@ -236,6 +275,49 @@ write_content_node(int64_t num_body_bytes)
   return std::string(R"(,"content":{"encoding":"plain","size":)" + 
std::to_string(num_body_bytes) + '}');
 }
 
+/** Initialize the generic sensitive field to be dumped. This is used instead
+ * of the sensitive field values seen on the wire.
+ */
+void
+initialize_default_sensitive_field()
+{
+  // 128 KB is the maximum size supported for all headers, so this size should
+  // be plenty large for our needs.
+  constexpr size_t default_field_size = 128 * 1024;
+  defaut_sensitive_field_value.resize(default_field_size);
+
+  char *field_buffer = defaut_sensitive_field_value.data();
+  for (auto i = 0u; i < default_field_size; i += 8) {
+    sprintf(field_buffer, "%07x ", i / 8);
+    field_buffer += 8;
+  }
+}
+
+/** Inspect the field to see whether it is sensitive and return a generic value
+ * of equal size to the original if it is.
+ *
+ * @param[in] name The field name to inspect.
+ * @param[in] original_value The field value to inspect.
+ *
+ * @return The value traffic_dump should dump for the given field.
+ */
+std::string_view
+replace_sensitive_fields(std::string_view name, std::string_view 
original_value)
+{
+  auto search = sensitive_fields.find(std::string(name));
+  if (search == sensitive_fields.end()) {
+    return original_value;
+  }
+  auto new_value_size = original_value.size();
+  if (original_value.size() > defaut_sensitive_field_value.size()) {
+    new_value_size = defaut_sensitive_field_value.size();
+    TSError("[%s] Encountered a sensitive field value larger than our default "
+            "field size. Default size: %zu, incoming field size: %zu",
+            PLUGIN_NAME, defaut_sensitive_field_value.size(), 
original_value.size());
+  }
+  return std::string_view{defaut_sensitive_field_value.data(), new_value_size};
+}
+
 /// Read the txn information from TSMBuffer and write the header information.
 /// This function does not write the content node.
 std::string
@@ -302,8 +384,11 @@ write_message_node_no_content(TSMBuffer &buffer, TSMLoc 
&hdr_loc)
     int name_len = 0, value_len = 0;
     // Append to "fields" list if valid value exists
     if ((name = TSMimeHdrFieldNameGet(buffer, hdr_loc, field_loc, &name_len)) 
&& name_len) {
+      std::string_view name_view{name, static_cast<size_t>(name_len)};
       value = TSMimeHdrFieldValueStringGet(buffer, hdr_loc, field_loc, -1, 
&value_len);
-      result += json_entry_array(name, name_len, value, value_len);
+      std::string_view value_view{value, static_cast<size_t>(value_len)};
+      std::string_view new_value = replace_sensitive_fields(name_view, 
value_view);
+      result += json_entry_array(name_view, new_value);
     }
 
     next_field_loc = TSMimeHdrFieldNext(buffer, hdr_loc, field_loc);
@@ -391,6 +476,7 @@ session_txn_handler(TSCont contp, TSEvent event, void 
*edata)
     // Get UUID
     char uuid[TS_CRUUID_STRING_LEN + 1];
     TSAssert(TS_SUCCESS == TSClientRequestUuidGet(txnp, uuid));
+    std::string_view uuid_view{uuid, strnlen(uuid, TS_CRUUID_STRING_LEN)};
 
     // Generate per transaction json records
     if (!ssnData->first) {
@@ -408,7 +494,7 @@ session_txn_handler(TSCont contp, TSEvent event, void 
*edata)
     // The uuid is a header field for each message in the transaction. Use the
     // "all" node to apply to each message.
     std::string_view name = "uuid";
-    txn_info += ",\"all\":{\"headers\":{\"fields\":[" + 
json_entry_array(name.data(), name.size(), uuid, strlen(uuid));
+    txn_info += ",\"all\":{\"headers\":{\"fields\":[" + json_entry_array(name, 
uuid_view);
     txn_info += "]}}";
     ssnData->write_to_disk(txn_info);
     break;
@@ -633,15 +719,38 @@ TSPluginInit(int argc, const char *argv[])
   info.vendor_name   = "Apache Software Foundation";
   info.support_email = "d...@trafficserver.apache.org";
 
+  bool sensitive_fields_were_specified = false;
   /// Commandline options
   static const struct option longopts[] = {{"logdir", required_argument, 
nullptr, 'l'},
                                            {"sample", required_argument, 
nullptr, 's'},
                                            {"limit", required_argument, 
nullptr, 'm'},
+                                           {"sensitive-fields", 
required_argument, nullptr, 'f'},
                                            {nullptr, no_argument, nullptr, 0}};
   int opt                               = 0;
   while (opt >= 0) {
     opt = getopt_long(argc, const_cast<char *const *>(argv), "l:", longopts, 
nullptr);
     switch (opt) {
+    case 'f': {
+      // --sensitive-fields takes a comma-separated list of HTTP fields that
+      // are sensitive.  The field values for these fields will be replaced
+      // with generic traffic_dump generated data.
+      //
+      // If this option is not used, then the default values in
+      // default_sensitive_fields is used. If this option is used, then it
+      // replaced the default sensitive fields with the user-supplied list of
+      // sensitive fields.
+      sensitive_fields_were_specified = true;
+      ts::TextView input_filter_fields{std::string_view{optarg}};
+      ts::TextView filter_field;
+      while (!(filter_field = 
input_filter_fields.take_prefix_at(',')).empty()) {
+        filter_field.trim_if(&isspace);
+        if (filter_field.empty()) {
+          continue;
+        }
+        sensitive_fields.emplace(filter_field);
+      }
+      break;
+    }
     case 'l': {
       log_path = ts::file::path{optarg};
       break;
@@ -664,6 +773,23 @@ TSPluginInit(int argc, const char *argv[])
     }
   }
 
+  if (!sensitive_fields_were_specified) {
+    // The user did not provide their own list of sensitive fields. Use the
+    // default.
+    sensitive_fields.merge(default_sensitive_fields);
+  }
+
+  std::string sensitive_fields_string;
+  bool is_first = true;
+  for (const auto &field : sensitive_fields) {
+    if (!is_first) {
+      sensitive_fields_string += ", ";
+    }
+    is_first = false;
+    sensitive_fields_string += field;
+  }
+  TSDebug(PLUGIN_NAME, "Sensitive fields for which generic values will be 
dumped: %s", sensitive_fields_string.c_str());
+
   // Make absolute path if not
   if (!log_path.is_absolute()) {
     log_path = ts::file::path(TSInstallDirGet()) / log_path;
@@ -675,6 +801,8 @@ TSPluginInit(int argc, const char *argv[])
   } else if (TS_SUCCESS != TSUserArgIndexReserve(TS_USER_ARGS_SSN, 
PLUGIN_NAME, "Track log related data", &s_arg_idx)) {
     TSError("[%s] Unable to initialize plugin (disabled). Failed to reserve 
ssn arg.", PLUGIN_NAME);
   } else {
+    initialize_default_sensitive_field();
+
     /// Add global hooks
     TSCont ssncont = TSContCreate(global_ssn_handler, nullptr);
     TSHttpHookAdd(TS_HTTP_SSN_START_HOOK, ssncont);
diff --git a/tests/gold_tests/pluginTest/traffic_dump/gold/200.gold 
b/tests/gold_tests/pluginTest/traffic_dump/gold/200.gold
index 71aa5d5..0ac91ba 100644
--- a/tests/gold_tests/pluginTest/traffic_dump/gold/200.gold
+++ b/tests/gold_tests/pluginTest/traffic_dump/gold/200.gold
@@ -6,6 +6,7 @@
 ``
 < HTTP/1.1 200 OK
 < Content-Length: 0
+< Set-Cookie: classified_not_for_logging
 < Date: ``
 < Age: ``
 < Server: ATS/``
diff --git a/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py 
b/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py
index afb75db..393c124 100644
--- a/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py
+++ b/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py
@@ -33,14 +33,16 @@ request_header = {"headers": "GET / HTTP/1.1\r\n"
                   "Host: www.example.com\r\nContent-Length: 0\r\n\r\n",
                   "timestamp": "1469733493.993", "body": ""}
 response_header = {"headers": "HTTP/1.1 200 OK"
-                   "\r\nConnection: close\r\nContent-Length: 0\r\n\r\n",
+                   "\r\nConnection: close\r\nContent-Length: 0"
+                   "\r\nSet-Cookie: classified_not_for_logging\r\n\r\n",
                    "timestamp": "1469733493.993", "body": ""}
 server.addResponse("sessionfile.log", request_header, response_header)
 request_header = {"headers": "GET /one HTTP/1.1\r\n"
                   "Host: www.example.com\r\nContent-Length: 0\r\n\r\n",
                   "timestamp": "1469733493.993", "body": ""}
 response_header = {"headers": "HTTP/1.1 200 OK"
-                   "\r\nConnection: close\r\nContent-Length: 0\r\n\r\n",
+                   "\r\nConnection: close\r\nContent-Length: 0"
+                   "\r\nSet-Cookie: classified_not_for_logging\r\n\r\n",
                    "timestamp": "1469733493.993", "body": ""}
 server.addResponse("sessionfile.log", request_header, response_header)
 request_header = {"headers": "GET /post_with_body HTTP/1.1\r\n"
@@ -63,7 +65,8 @@ ts.Disk.remap_config.AddLine(
 )
 # Configure traffic_dump.
 ts.Disk.plugin_config.AddLine(
-    'traffic_dump.so --logdir {0} --sample 1 --limit 
1000000000'.format(replay_dir)
+    'traffic_dump.so --logdir {0} --sample 1 --limit 1000000000 '
+    '--sensitive-fields 
"cookie,set-cookie,x-request-1,x-request-2"'.format(replay_dir)
 )
 
 # Set up trafficserver expectations.
@@ -99,8 +102,10 @@ tr = Test.AddTestRun("First transaction")
 
 tr.Processes.Default.StartBefore(server, 
ready=When.PortOpen(server.Variables.Port))
 tr.Processes.Default.StartBefore(Test.Processes.ts)
-tr.Processes.Default.Command = 'curl http://127.0.0.1:{0} -H\'Host: 
www.example.com\' --verbose'.format(
-    ts.Variables.port)
+tr.Processes.Default.Command = \
+        ('curl http://127.0.0.1:{0} -H"Cookie: donotlogthis" '
+         '-H"Host: www.example.com" -H"X-Request-1: ultra_sensitive" 
--verbose'.format(
+             ts.Variables.port))
 tr.Processes.Default.ReturnCode = 0
 tr.Processes.Default.Streams.stderr = "gold/200.gold"
 tr.StillRunningAfter = server
@@ -108,8 +113,10 @@ tr.StillRunningAfter = ts
 
 # Execute the second transaction.
 tr = Test.AddTestRun("Second transaction")
-tr.Processes.Default.Command = 'curl http://127.0.0.1:{0}/one -H\'Host: 
www.example.com\' --verbose'.format(
-    ts.Variables.port)
+tr.Processes.Default.Command = \
+        ('curl http://127.0.0.1:{0}/one -H"Host: www.example.com" '
+         '-H"X-Request-2: also_very_sensitive" --verbose'.format(
+            ts.Variables.port))
 tr.Processes.Default.ReturnCode = 0
 tr.Processes.Default.Streams.stderr = "gold/200.gold"
 tr.StillRunningAfter = server
@@ -118,11 +125,17 @@ tr.StillRunningAfter = ts
 # Verify the properties of the replay file for the first transaction.
 tr = Test.AddTestRun("Verify the json content of the first session")
 verify_replay = "verify_replay.py"
+sensitive_fields_arg = (
+        "--sensitive-fields cookie "
+        "--sensitive-fields set-cookie "
+        "--sensitive-fields x-request-1 "
+        "--sensitive-fields x-request-2 ")
 tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
-tr.Processes.Default.Command = "python3 {0} {1} {2}".format(
+tr.Processes.Default.Command = "python3 {0} {1} {2} {3}".format(
         verify_replay,
         os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 
'replay_schema.json'),
-        replay_file_session_1)
+        replay_file_session_1,
+        sensitive_fields_arg)
 tr.Processes.Default.ReturnCode = 0
 tr.StillRunningAfter = server
 tr.StillRunningAfter = ts
@@ -130,10 +143,11 @@ tr.StillRunningAfter = ts
 # Verify the properties of the replay file for the second transaction.
 tr = Test.AddTestRun("Verify the json content of the second session")
 tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
-tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target 
'/one'".format(
+tr.Processes.Default.Command = "python3 {0} {1} {2} {3} --request-target 
'/one'".format(
         verify_replay,
         os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 
'replay_schema.json'),
-        replay_file_session_2)
+        replay_file_session_2,
+        sensitive_fields_arg)
 tr.Processes.Default.ReturnCode = 0
 tr.StillRunningAfter = server
 tr.StillRunningAfter = ts
@@ -147,7 +161,7 @@ tr = Test.AddTestRun("Make a request with an explicit 
target.")
 request_target = "http://localhost:{0}/candy".format(ts.Variables.port)
 tr.Processes.Default.Command = (
         'curl --request-target "{0}" '
-        'http://127.0.0.1:{1} -H\'Host: www.example.com\' --verbose'.format(
+        'http://127.0.0.1:{1} -H"Host: www.example.com" --verbose'.format(
             request_target, ts.Variables.port))
 tr.Processes.Default.ReturnCode = 0
 tr.Processes.Default.Streams.stderr = "gold/explicit_target.gold"
@@ -157,10 +171,11 @@ tr.StillRunningAfter = ts
 tr = Test.AddTestRun("Verify the replay file has the explicit target.")
 tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
 
-tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target 
'{3}'".format(
+tr.Processes.Default.Command = "python3 {0} {1} {2} {3} --request-target 
'{4}'".format(
         verify_replay,
         os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 
'replay_schema.json'),
         replay_file_session_3,
+        sensitive_fields_arg,
         request_target)
 tr.Processes.Default.ReturnCode = 0
 tr.StillRunningAfter = server
@@ -178,7 +193,7 @@ request_target = 
"http://localhost:{0}/post_with_body".format(ts.Variables.port)
 # in the test run directory.
 tr.Processes.Default.Command = (
         'curl --data-binary @{0} --request-target "{1}" '
-        'http://127.0.0.1:{2} -H\'Host: www.example.com\' --verbose'.format(
+        'http://127.0.0.1:{2} -H"Host: www.example.com" --verbose'.format(
             verify_replay, request_target, ts.Variables.port))
 tr.Processes.Default.ReturnCode = 0
 tr.Processes.Default.Streams.stderr = "gold/post_with_body.gold"
@@ -190,10 +205,11 @@ tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
 
 size_of_verify_replay_file = os.path.getsize(os.path.join(Test.TestDirectory, 
verify_replay))
 tr.Processes.Default.Command = \
-        "python3 {0} {1} {2} --client-request-size {3}".format(
+        "python3 {0} {1} {2} {3} --client-request-size {4}".format(
             verify_replay,
             os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 
'replay_schema.json'),
             replay_file_session_4,
+            sensitive_fields_arg,
             size_of_verify_replay_file)
 tr.Processes.Default.ReturnCode = 0
 tr.StillRunningAfter = server
diff --git a/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py 
b/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py
index 532c92b..39d5987 100644
--- a/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py
+++ b/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py
@@ -23,6 +23,28 @@ import jsonschema
 import sys
 
 
+expected_sensitive_value = \
+    '''0000000 0000001 0000002 0000003 0000004 0000005 0000006 0000007 0000008 
0000009 000000a 000000b 000000c 000000d ''' \
+    '''000000e 000000f 0000010 0000011 0000012 0000013 0000014 0000015 0000016 
0000017 0000018 0000019 000001a 000001b ''' \
+    '''000001c 000001d 000001e 000001f 0000020 0000021 0000022 0000023 0000024 
0000025 0000026 0000027 0000028 0000029 ''' \
+    '''000002a 000002b 000002c 000002d 000002e 000002f 0000030 0000031 0000032 
0000033 0000034 0000035 0000036 0000037 ''' \
+    '''0000038 0000039 000003a 000003b 000003c 000003d 000003e 000003f 0000040 
0000041 0000042 0000043 0000044 0000045 ''' \
+    '''0000046 0000047 0000048 0000049 000004a 000004b 000004c 000004d 000004e 
000004f 0000050 0000051 0000052 0000053 ''' \
+    '''0000054 0000055 0000056 0000057 0000058 0000059 000005a 000005b 000005c 
000005d 000005e 000005f 0000060 0000061 ''' \
+    '''0000062 0000063 0000064 0000065 0000066 0000067 0000068 0000069 000006a 
000006b 000006c 000006d 000006e 000006f ''' \
+    '''0000070 0000071 0000072 0000073 0000074 0000075 0000076 0000077 0000078 
0000079 000007a 000007b 000007c 000007d ''' \
+    '''000007e 000007f 0000080 0000081 0000082 0000083 0000084 0000085 0000086 
0000087 0000088 0000089 000008a 000008b ''' \
+    '''000008c 000008d 000008e 000008f 0000090 0000091 0000092 0000093 0000094 
0000095 0000096 0000097 0000098 0000099 ''' \
+    '''000009a 000009b 000009c 000009d 000009e 000009f 00000a0 00000a1 00000a2 
00000a3 00000a4 00000a5 00000a6 00000a7 ''' \
+    '''00000a8 00000a9 00000aa 00000ab 00000ac 00000ad 00000ae 00000af 00000b0 
00000b1 00000b2 00000b3 00000b4 00000b5 ''' \
+    '''00000b6 00000b7 00000b8 00000b9 00000ba 00000bb 00000bc 00000bd 00000be 
00000bf 00000c0 00000c1 00000c2 00000c3 ''' \
+    '''00000c4 00000c5 00000c6 00000c7 00000c8 00000c9 00000ca 00000cb 00000cc 
00000cd 00000ce 00000cf 00000d0 00000d1 ''' \
+    '''00000d2 00000d3 00000d4 00000d5 00000d6 00000d7 00000d8 00000d9 00000da 
00000db 00000dc 00000dd 00000de 00000df ''' \
+    '''00000e0 00000e1 00000e2 00000e3 00000e4 00000e5 00000e6 00000e7 00000e8 
00000e9 00000ea 00000eb 00000ec 00000ed ''' \
+    '''00000ee 00000ef 00000f0 00000f1 00000f2 00000f3 00000f4 00000f5 00000f6 
00000f7 00000f8 00000f9 00000fa 00000fb ''' \
+    '''00000fc 00000fd 00000fe 00000ff'''
+
+
 def validate_json(schema_json, replay_json):
     """
     Validate the replay file against the provided schema.
@@ -90,6 +112,33 @@ def verify_client_request_size(replay_json, 
client_request_size):
     return True
 
 
+def verify_sensitive_fields_not_dumped(replay_json, sensitive_fields):
+    """
+    Verify that all of the cookie fields have the expected value.
+    """
+    message_types = ['client-request', 'proxy-request', 'server-response', 
'proxy-response']
+    try:
+        for session in replay_json['sessions']:
+            for transaction in session['transactions']:
+                for message_type in transaction:
+                    if message_type not in message_types:
+                        continue
+                    message = transaction[message_type]
+                    for field in message['headers']['fields']:
+                        field_name = field[0].lower()
+                        if field_name in sensitive_fields:
+                            field_value = field[1]
+                            if field_value not in expected_sensitive_value:
+                                print("Found an unexpected cookie: {}: 
{}".format(field[0], field[1]))
+                                return False
+
+    except KeyError:
+        print("Could not find headers in the replay file.")
+        return False
+
+    return True
+
+
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("schema_file",
@@ -103,6 +152,9 @@ def parse_args():
     parser.add_argument("--client-request-size",
                         type=int,
                         help="The expected size value in the client-request 
node.")
+    parser.add_argument("--sensitive-fields",
+                        action="append",
+                        help="The fields that are considered sensitive and 
replaced with insensitive values.")
     return parser.parse_args()
 
 
@@ -133,6 +185,9 @@ def main():
     if args.client_request_size and not 
verify_client_request_size(replay_json, args.client_request_size):
         return 1
 
+    if args.sensitive_fields and not 
verify_sensitive_fields_not_dumped(replay_json, args.sensitive_fields):
+        return 1
+
     return 0
 
 

Reply via email to