[ 
https://issues.apache.org/jira/browse/HADOOP-17511?focusedWorklogId=601112&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-601112
 ]

ASF GitHub Bot logged work on HADOOP-17511:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 24/May/21 11:03
            Start Date: 24/May/21 11:03
    Worklog Time Spent: 10m 
      Work Description: steveloughran commented on a change in pull request 
#2807:
URL: https://github.com/apache/hadoop/pull/2807#discussion_r637865440



##########
File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java
##########
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.net.URISyntaxException;
+import java.util.Map;
+import java.util.regex.Matcher;
+
+import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor;
+import org.apache.hadoop.fs.store.audit.AuditSpan;
+import org.apache.hadoop.fs.audit.CommonAuditContext;
+import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import static 
org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig;
+import static 
org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.REFERRER_HEADER_FILTER;
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.*;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.HEADER_REFERRER;
+import static 
org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader.maybeStripWrappedQuotes;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_FILESYSTEM_ID;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_ID;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_OP;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PATH;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PATH2;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PRINCIPAL;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD0;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD1;
+import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_TIMESTAMP;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Tests for referrer audit header generation/parsing.
+ */
+public class TestHttpReferrerAuditHeader extends AbstractAuditingTest {
+
+  /**
+   * Logging.
+   */
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestHttpReferrerAuditHeader.class);
+
+  private LoggingAuditor auditor;
+
+  @Before
+  public void setup() throws Exception {
+    super.setup();
+
+    auditor = (LoggingAuditor) getManager().getAuditor();
+  }
+
+  /**
+   * Creaate the config from {@link AuditTestSupport#loggingAuditConfig()}
+   * and patch in filtering for fields x1, x2, x3.
+   * @return a logging configuration.
+   */
+  protected Configuration createConfig() {
+    final Configuration conf = loggingAuditConfig();
+    conf.set(REFERRER_HEADER_FILTER, "x1, x2, x3");
+    return conf;
+  }
+
+  /**
+   * This verifies that passing a request through the audit manager
+   * causes the http referrer header to be added, that it can
+   * be split to query parameters, and that those parameters match
+   * those of the active wrapped span.
+   */
+  @Test
+  public void testHttpReferrerPatchesTheRequest() throws Throwable {
+    AuditSpan span = span();
+    long ts = span.getTimestamp();
+    GetObjectMetadataRequest request = head();
+    Map<String, String> headers
+        = request.getCustomRequestHeaders();
+    assertThat(headers)
+        .describedAs("Custom headers")
+        .containsKey(HEADER_REFERRER);
+    String header = headers.get(HEADER_REFERRER);
+    LOG.info("Header is {}", header);
+    Map<String, String> params
+        = HttpReferrerAuditHeader.extractQueryParameters(header);
+    assertMapContains(params, PARAM_PRINCIPAL,
+        UserGroupInformation.getCurrentUser().getUserName());
+    assertMapContains(params, PARAM_FILESYSTEM_ID, auditor.getAuditorId());
+    assertMapContains(params, PARAM_OP, OPERATION);
+    assertMapContains(params, PARAM_PATH, PATH_1);
+    assertMapContains(params, PARAM_PATH2, PATH_2);
+    String threadID = CommonAuditContext.currentThreadID();
+    assertMapContains(params, PARAM_THREAD0, threadID);
+    assertMapContains(params, PARAM_THREAD1, threadID);
+    assertMapContains(params, PARAM_ID, span.getSpanId());
+    assertThat(span.getTimestamp())
+        .describedAs("Timestamp of " + span)
+        .isEqualTo(ts);
+
+    assertMapContains(params, PARAM_TIMESTAMP,
+        Long.toString(ts));
+  }
+
+  @Test
+  public void testHeaderComplexPaths() throws Throwable {
+    String p1 = "s3a://dotted.bucket/path: value/subdir";
+    String p2 = "s3a://key/";
+    AuditSpan span = getManager().createSpan(OPERATION, p1, p2);
+    long ts = span.getTimestamp();
+    Map<String, String> params = issueRequestAndExtractParameters();
+    assertMapContains(params, PARAM_PRINCIPAL,
+        UserGroupInformation.getCurrentUser().getUserName());
+    assertMapContains(params, PARAM_FILESYSTEM_ID, auditor.getAuditorId());
+    assertMapContains(params, PARAM_OP, OPERATION);
+    assertMapContains(params, PARAM_PATH, p1);
+    assertMapContains(params, PARAM_PATH2, p2);
+    String threadID = CommonAuditContext.currentThreadID();
+    assertMapContains(params, PARAM_THREAD0, threadID);
+    assertMapContains(params, PARAM_THREAD1, threadID);
+    assertMapContains(params, PARAM_ID, span.getSpanId());
+    assertThat(span.getTimestamp())
+        .describedAs("Timestamp of " + span)
+        .isEqualTo(ts);
+
+    assertMapContains(params, PARAM_TIMESTAMP,
+        Long.toString(ts));
+  }
+
+  /**
+   * Issue a request, then get the header field and parse it to the parameter.
+   * @return map of query params on the referrer header.
+   * @throws URISyntaxException failure to parse the header as a URI.
+   */
+  private Map<String, String> issueRequestAndExtractParameters()
+      throws URISyntaxException {
+    head();
+    return HttpReferrerAuditHeader.extractQueryParameters(
+        auditor.getLastHeader());
+  }
+
+
+  /**
+   * Test that headers are filtered out if configured.
+   */
+  @Test
+  public void testHeaderFiltering() throws Throwable {
+    // add two attributes, x2 will be filtered.
+    AuditSpan span = getManager().createSpan(OPERATION, null, null);
+    auditor.addAttribute("x0", "x0");
+    auditor.addAttribute("x2", "x2");
+    final Map<String, String> params
+        = issueRequestAndExtractParameters();
+    assertThat(params)
+        .doesNotContainKey("x2");
+
+  }
+
+  /**
+   * A real log entry.
+   * This is derived from a real log entry on a test run.
+   * If this needs to be updated, please do it from a real log.
+   * Splitting this up across lines has a tendency to break things, so
+   * be careful making changes.
+   */
+  public static final String SAMPLE_LOG_ENTRY =
+      "183c9826b45486e485693808f38e2c4071004bf5dfd4c3ab210f0a21a4000000"
+          + " bucket-london"
+          + " [13/May/2021:11:26:06 +0000]"
+          + " 109.157.171.174"
+          + " arn:aws:iam::152813717700:user/dev"
+          + " M7ZB7C4RTKXJKTM9"
+          + " REST.PUT.OBJECT"
+          + " fork-0001/test/testParseBrokenCSVFile"
+          + " \"PUT /fork-0001/test/testParseBrokenCSVFile HTTP/1.1\""
+          + " 200"
+          + " -"
+          + " -"
+          + " 794"
+          + " 55"
+          + " 17"
+          + " \"https://audit.example.org/op_create/";
+          + "e8ede3c7-8506-4a43-8268-fe8fcbb510a4-00000278/"
+          + "?op=op_create"
+          + "&p1=fork-0001/test/testParseBrokenCSVFile"
+          + "&pr=alice"
+          + "&ps=2eac5a04-2153-48db-896a-09bc9a2fd132"
+          + "&id=e8ede3c7-8506-4a43-8268-fe8fcbb510a4-00000278&t0=154"
+          + "&fs=e8ede3c7-8506-4a43-8268-fe8fcbb510a4&t1=156&"
+          + "ts=1620905165700\""
+          + " \"Hadoop 3.4.0-SNAPSHOT, java/1.8.0_282 vendor/AdoptOpenJDK\""
+          + " -"
+          + " TrIqtEYGWAwvu0h1N9WJKyoqM0TyHUaY+ZZBwP2yNf2qQp1Z/0="
+          + " SigV4"
+          + " ECDHE-RSA-AES128-GCM-SHA256"
+          + " AuthHeader"
+          + " bucket-london.s3.eu-west-2.amazonaws.com"
+          + " TLSv1.2";
+
+  private static final String DESCRIPTION = String.format(
+      "log entry %s split by %s", SAMPLE_LOG_ENTRY,
+      LOG_ENTRY_PATTERN);
+
+  /**
+   * Match the log entry and validate the results.
+   */
+  @Test
+  public void testMatchAWSLogEntry() throws Throwable {
+
+    LOG.info("Matcher pattern is\n'{}'", LOG_ENTRY_PATTERN);
+    LOG.info("Log entry is\n'{}'", SAMPLE_LOG_ENTRY);
+    final Matcher matcher = LOG_ENTRY_PATTERN.matcher(SAMPLE_LOG_ENTRY);
+
+    // match the pattern against the entire log entry.
+    assertThat(matcher.matches())

Review comment:
       ooh, that was a bug in my assert. Added an ` .isTrue()` at the end. well 
spotted
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 601112)
    Time Spent: 20h  (was: 19h 50m)

> Add an Audit plugin point for S3A auditing/context
> --------------------------------------------------
>
>                 Key: HADOOP-17511
>                 URL: https://issues.apache.org/jira/browse/HADOOP-17511
>             Project: Hadoop Common
>          Issue Type: Sub-task
>    Affects Versions: 3.3.1
>            Reporter: Steve Loughran
>            Assignee: Steve Loughran
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 20h
>  Remaining Estimate: 0h
>
> Add a way for auditing tools to correlate S3 object calls with Hadoop FS API 
> calls.
> Initially just to log/forward to an auditing service.
> Later: let us attach them as parameters in S3 requests, such as opentrace 
> headeers or (my initial idea: http referrer header -where it will get into 
> the log)
> Challenges
> * ensuring the audit span is created for every public entry point. That will 
> have to include those used in s3guard tools, some defacto public APIs
> * and not re-entered for active spans. s3A code must not call back into the 
> FS API points
> * Propagation across worker threads



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to