[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2023-02-20 Thread via GitHub


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r1112667084


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditTool.java:
##
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.nio.file.Files;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+
+import static 
org.apache.hadoop.fs.s3a.audit.TestS3AAuditLogMergerAndParser.SAMPLE_LOG_ENTRY;
+
+/**
+ * This will implement tests on AuditTool class.
+ */
+public class TestAuditTool extends AbstractS3ATestBase {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestAuditTool.class);
+
+  private final AuditTool auditTool = new AuditTool();
+
+  /**
+   * Sample directories and files to test.
+   */
+  private File sampleFile;
+  private File sampleDir;
+  private File sampleDestDir;
+
+  /**
+   * Testing run method in AuditTool class by passing source and destination
+   * paths.
+   */
+  @Test
+  public void testRun() throws Exception {
+sampleDir = Files.createTempDirectory("sampleDir").toFile();
+sampleFile = File.createTempFile("sampleFile", ".txt", sampleDir);
+try (FileWriter fw = new FileWriter(sampleFile)) {
+  fw.write(SAMPLE_LOG_ENTRY);
+  fw.flush();
+}
+sampleDestDir = Files.createTempDirectory("sampleDestDir").toFile();
+Path logsPath = new Path(sampleDir.toURI());
+Path destPath = new Path(sampleDestDir.toURI());
+String[] args = {destPath.toString(), logsPath.toString()};
+auditTool.run(args);
+FileSystem fileSystem = destPath.getFileSystem(getConfiguration());
+RemoteIterator listOfDestFiles =
+fileSystem.listFiles(destPath, true);
+while (listOfDestFiles.hasNext()) {

Review Comment:
   As suggested in the above comment, I added the expected path in code and 
opened the file using `fileSystem.open(expectedPath)` .
   The test will fail at open if `AvroData.avro` is not there or created and if 
file is there then test will pass.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2023-02-20 Thread via GitHub


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r1112667084


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditTool.java:
##
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.nio.file.Files;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+
+import static 
org.apache.hadoop.fs.s3a.audit.TestS3AAuditLogMergerAndParser.SAMPLE_LOG_ENTRY;
+
+/**
+ * This will implement tests on AuditTool class.
+ */
+public class TestAuditTool extends AbstractS3ATestBase {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestAuditTool.class);
+
+  private final AuditTool auditTool = new AuditTool();
+
+  /**
+   * Sample directories and files to test.
+   */
+  private File sampleFile;
+  private File sampleDir;
+  private File sampleDestDir;
+
+  /**
+   * Testing run method in AuditTool class by passing source and destination
+   * paths.
+   */
+  @Test
+  public void testRun() throws Exception {
+sampleDir = Files.createTempDirectory("sampleDir").toFile();
+sampleFile = File.createTempFile("sampleFile", ".txt", sampleDir);
+try (FileWriter fw = new FileWriter(sampleFile)) {
+  fw.write(SAMPLE_LOG_ENTRY);
+  fw.flush();
+}
+sampleDestDir = Files.createTempDirectory("sampleDestDir").toFile();
+Path logsPath = new Path(sampleDir.toURI());
+Path destPath = new Path(sampleDestDir.toURI());
+String[] args = {destPath.toString(), logsPath.toString()};
+auditTool.run(args);
+FileSystem fileSystem = destPath.getFileSystem(getConfiguration());
+RemoteIterator listOfDestFiles =
+fileSystem.listFiles(destPath, true);
+while (listOfDestFiles.hasNext()) {

Review Comment:
   I added the expected path in code and opened the file using 
`fileSystem.open(expectedPath)` .
   The test will fail at open if `AvroData.avro` is not there or created and if 
file is there then test will pass.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2023-02-20 Thread via GitHub


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r1112663365


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditTool.java:
##
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.nio.file.Files;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+
+import static 
org.apache.hadoop.fs.s3a.audit.TestS3AAuditLogMergerAndParser.SAMPLE_LOG_ENTRY;
+
+/**
+ * This will implement tests on AuditTool class.
+ */
+public class TestAuditTool extends AbstractS3ATestBase {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestAuditTool.class);
+
+  private final AuditTool auditTool = new AuditTool();
+
+  /**
+   * Sample directories and files to test.
+   */
+  private File sampleFile;
+  private File sampleDir;
+  private File sampleDestDir;
+
+  /**
+   * Testing run method in AuditTool class by passing source and destination
+   * paths.
+   */
+  @Test
+  public void testRun() throws Exception {
+sampleDir = Files.createTempDirectory("sampleDir").toFile();
+sampleFile = File.createTempFile("sampleFile", ".txt", sampleDir);
+try (FileWriter fw = new FileWriter(sampleFile)) {
+  fw.write(SAMPLE_LOG_ENTRY);
+  fw.flush();
+}
+sampleDestDir = Files.createTempDirectory("sampleDestDir").toFile();
+Path logsPath = new Path(sampleDir.toURI());
+Path destPath = new Path(sampleDestDir.toURI());
+String[] args = {destPath.toString(), logsPath.toString()};
+auditTool.run(args);
+FileSystem fileSystem = destPath.getFileSystem(getConfiguration());
+RemoteIterator listOfDestFiles =

Review Comment:
   created expected path as suggested



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2023-02-20 Thread via GitHub


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r1112662862


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/mapreduce/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.audit.AvroDataRecord;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.LineRecordReader;
+
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.AWS_LOG_REGEXP_GROUPS;
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.LOG_ENTRY_PATTERN;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  public static final int MAX_LINE_LENGTH = 32000;
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * parseAuditLog method helps in parsing the audit log
+   * into key-value pairs using regular expressions.
+   *
+   * @param singleAuditLog this is single audit log from merged audit log file
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs 
of a single audit log
+   */
+  public HashMap parseAuditLog(String singleAuditLog) {
+HashMap auditLogMap = new HashMap<>();
+if (singleAuditLog == null || singleAuditLog.length() == 0) {
+  LOG.debug(
+  "This is an empty string or null string, expected a valid string to 
parse");
+  return auditLogMap;
+}
+final Matcher matcher = LOG_ENTRY_PATTERN.matcher(singleAuditLog);
+boolean patternMatching = matcher.matches();
+if (patternMatching) {
+  for (String key : AWS_LOG_REGEXP_GROUPS) {
+try {
+  final String value = matcher.group(key);
+  auditLogMap.put(key, value);
+} catch (IllegalStateException e) {
+  LOG.debug(String.valueOf(e));
+}
+  }
+}
+return auditLogMap;
+  }
+
+  /**
+   * parseReferrerHeader method helps in parsing the http referrer header.
+   * which is one of the key-value pair of audit log
+   *
+   * @param referrerHeader this is the http referrer header of a particular 
audit log
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs
+   * of audit log as well as referrer header present in it
+   */
+  public HashMap parseReferrerHeader(String referrerHeader) {
+HashMap referrerHeaderMap = new HashMap<>();
+if (referrerHeader == null || referrerHeader.length() == 0) {
+  LOG.debug(
+  "This is an empty string or null string, expected a valid string to 
parse");
+  return referrerHeaderMap;
+}
+int indexOfQuestionMark = referrerHeader.indexOf("?");
+String httpReferrer = referrerHeader.substring(indexOfQuestionMark + 1,
+referrerHeader.length() - 1);
+int lengthOfReferrer = httpReferrer.length();
+int start = 0;
+while (start < lengthOfReferrer) {
+  int equals = httpReferrer.indexOf("=", start);
+  // no match : break
+  if (equals == -1) {
+break;
+  }
+  String key = httpReferrer.substring(start, equals);
+  int end = httpReferrer.indexOf("&", equals);
+  // or end of string
+  if (end == -1) {
+end = lengthOfReferrer;
+  }
+  String value = httpReferrer.substring(equals + 1, end);
+  referrerHeaderMap.put

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-12-08 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r1043100069


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/mapreduce/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.audit.AvroDataRecord;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.LineRecordReader;
+
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.AWS_LOG_REGEXP_GROUPS;
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.LOG_ENTRY_PATTERN;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  public static final int MAX_LINE_LENGTH = 32000;
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * parseAuditLog method helps in parsing the audit log
+   * into key-value pairs using regular expressions.
+   *
+   * @param singleAuditLog this is single audit log from merged audit log file
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs 
of a single audit log
+   */
+  public HashMap parseAuditLog(String singleAuditLog) {
+HashMap auditLogMap = new HashMap<>();
+if (singleAuditLog == null || singleAuditLog.length() == 0) {
+  LOG.debug(
+  "This is an empty string or null string, expected a valid string to 
parse");
+  return auditLogMap;
+}
+final Matcher matcher = LOG_ENTRY_PATTERN.matcher(singleAuditLog);
+boolean patternMatching = matcher.matches();
+if (patternMatching) {
+  for (String key : AWS_LOG_REGEXP_GROUPS) {
+try {
+  final String value = matcher.group(key);
+  auditLogMap.put(key, value);
+} catch (IllegalStateException e) {
+  LOG.debug(String.valueOf(e));
+}
+  }
+}
+return auditLogMap;
+  }
+
+  /**
+   * parseReferrerHeader method helps in parsing the http referrer header.
+   * which is one of the key-value pair of audit log
+   *
+   * @param referrerHeader this is the http referrer header of a particular 
audit log
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs
+   * of audit log as well as referrer header present in it
+   */
+  public HashMap parseReferrerHeader(String referrerHeader) {
+HashMap referrerHeaderMap = new HashMap<>();
+if (referrerHeader == null || referrerHeader.length() == 0) {
+  LOG.debug(
+  "This is an empty string or null string, expected a valid string to 
parse");
+  return referrerHeaderMap;
+}
+int indexOfQuestionMark = referrerHeader.indexOf("?");
+String httpReferrer = referrerHeader.substring(indexOfQuestionMark + 1,
+referrerHeader.length() - 1);
+int lengthOfReferrer = httpReferrer.length();
+int start = 0;
+while (start < lengthOfReferrer) {
+  int equals = httpReferrer.indexOf("=", start);
+  // no match : break
+  if (equals == -1) {
+break;
+  }
+  String key = httpReferrer.substring(start, equals);
+  int end = httpReferrer.indexOf("&", equals);
+  // or end of string
+  if (end == -1) {
+end = lengthOfReferrer;
+  }
+  String value = httpReferrer.substring(equals + 1, end);
+  referrerHeaderMap.put

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-12-08 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r1043069282


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.audit.mapreduce.S3AAuditLogMergerAndParser;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_FAIL;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * Its functionality is to parse the audit log files
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final S3AAuditLogMergerAndParser s3AAuditLogMergerAndParser =
+  new S3AAuditLogMergerAndParser();
+
+  /**
+   * Name of this tool: {@value}.
+   */
+  public static final String AUDITTOOL =
+  "org.apache.hadoop.fs.s3a.audit.AuditTool";
+
+  /**
+   * Purpose of this tool: {@value}.
+   */
+  public static final String PURPOSE =
+  "\n\nUSAGE:\nMerge, parse audit log files and convert into avro file "
+  + "for "
+  + "better "
+  + "visualization";
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int FAILURE = EXIT_FAIL;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  private static final String USAGE =
+  "bin/hadoop " + "Class" + " DestinationPath" + " SourcePath" + "\n" +
+  "bin/hadoop " + AUDITTOOL + " s3a://BUCKET" + " s3a://BUCKET" + "\n";
+
+  private PrintWriter out;
+
+  public AuditTool() {
+super();
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return USAGE + PURPOSE;
+  }
+
+  public String getName() {
+return AUDITTOOL;
+  }
+
+  /**
+   * This run method in AuditTool takes source and destination path of bucket,
+   * and check if there are directories and pass these paths to merge and
+   * parse audit log files.
+   *
+   * @param args argument list
+   * @return SUCCESS i.e, '0', which is an exit code
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List paths = Arrays.asList(args);
+if (paths.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+
+// Path of audit log files
+Path logsPath = new Path(paths.get(1));

Review Comment:
   modified



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-17 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r948701538


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/mapreduce/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.audit.AvroDataRecord;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.LineRecordReader;
+
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.AWS_LOG_REGEXP_GROUPS;
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.LOG_ENTRY_PATTERN;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  public static final int MAX_LINE_LENGTH = 32000;
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * parseAuditLog method helps in parsing the audit log
+   * into key-value pairs using regular expressions.
+   *
+   * @param singleAuditLog this is single audit log from merged audit log file
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs 
of a single audit log
+   */
+  public HashMap parseAuditLog(String singleAuditLog) {
+HashMap auditLogMap = new HashMap<>();
+if (singleAuditLog == null || singleAuditLog.length() == 0) {
+  LOG.debug(
+  "This is an empty string or null string, expected a valid string to 
parse");
+  return auditLogMap;
+}
+final Matcher matcher = LOG_ENTRY_PATTERN.matcher(singleAuditLog);
+boolean patternMatching = matcher.matches();
+if (patternMatching) {
+  for (String key : AWS_LOG_REGEXP_GROUPS) {
+try {
+  final String value = matcher.group(key);
+  auditLogMap.put(key, value);
+} catch (IllegalStateException e) {
+  LOG.debug(String.valueOf(e));

Review Comment:
   yes this catch block is required as matcher.group(key) throws 
IllegalStateException



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-17 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r948693543


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/mapreduce/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.audit.AvroDataRecord;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.LineRecordReader;
+
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.AWS_LOG_REGEXP_GROUPS;
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.LOG_ENTRY_PATTERN;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  public static final int MAX_LINE_LENGTH = 32000;
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * parseAuditLog method helps in parsing the audit log
+   * into key-value pairs using regular expressions.
+   *
+   * @param singleAuditLog this is single audit log from merged audit log file
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs 
of a single audit log
+   */
+  public HashMap parseAuditLog(String singleAuditLog) {
+HashMap auditLogMap = new HashMap<>();
+if (singleAuditLog == null || singleAuditLog.length() == 0) {
+  LOG.debug(
+  "This is an empty string or null string, expected a valid string to 
parse");
+  return auditLogMap;
+}
+final Matcher matcher = LOG_ENTRY_PATTERN.matcher(singleAuditLog);
+boolean patternMatching = matcher.matches();
+if (patternMatching) {
+  for (String key : AWS_LOG_REGEXP_GROUPS) {
+try {
+  final String value = matcher.group(key);
+  auditLogMap.put(key, value);
+} catch (IllegalStateException e) {
+  LOG.debug(String.valueOf(e));
+}
+  }
+}
+return auditLogMap;
+  }
+
+  /**
+   * parseReferrerHeader method helps in parsing the http referrer header.
+   * which is one of the key-value pair of audit log
+   *
+   * @param referrerHeader this is the http referrer header of a particular 
audit log
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs
+   * of audit log as well as referrer header present in it
+   */
+  public HashMap parseReferrerHeader(String referrerHeader) {
+HashMap referrerHeaderMap = new HashMap<>();
+if (referrerHeader == null || referrerHeader.length() == 0) {
+  LOG.debug(
+  "This is an empty string or null string, expected a valid string to 
parse");
+  return referrerHeaderMap;
+}
+int indexOfQuestionMark = referrerHeader.indexOf("?");
+String httpReferrer = referrerHeader.substring(indexOfQuestionMark + 1,
+referrerHeader.length() - 1);
+int lengthOfReferrer = httpReferrer.length();
+int start = 0;
+while (start < lengthOfReferrer) {
+  int equals = httpReferrer.indexOf("=", start);
+  // no match : break
+  if (equals == -1) {
+break;
+  }
+  String key = httpReferrer.substring(start, equals);
+  int end = httpReferrer.indexOf("&", equals);
+  // or end of string
+  if (end == -1) {
+end = lengthOfReferrer;
+  }
+  String value = httpReferrer.substring(equals + 1, end);
+  referrerHeaderMap.put(

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-17 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r948692091


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/mapreduce/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.audit.AvroDataRecord;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.LineRecordReader;
+
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.AWS_LOG_REGEXP_GROUPS;
+import static org.apache.hadoop.fs.s3a.audit.S3LogParser.LOG_ENTRY_PATTERN;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  public static final int MAX_LINE_LENGTH = 32000;
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * parseAuditLog method helps in parsing the audit log
+   * into key-value pairs using regular expressions.
+   *
+   * @param singleAuditLog this is single audit log from merged audit log file
+   * @return it returns a map i.e, auditLogMap which contains key-value pairs 
of a single audit log
+   */
+  public HashMap parseAuditLog(String singleAuditLog) {
+HashMap auditLogMap = new HashMap<>();
+if (singleAuditLog == null || singleAuditLog.length() == 0) {
+  LOG.debug(

Review Comment:
   modified as log at info



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-17 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r948691401


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/avro/AvroDataSchema.avsc:
##
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+{
+"type" : "record", "name" : "AvroDataRecord",

Review Comment:
   changed to AvroS3LogEntryRecord
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937944582


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditTool.java:
##
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+import static org.apache.hadoop.fs.s3a.audit.AuditTool.AUDIT;
+
+/**
+ * This will implement tests on AuditTool class.
+ */
+public class TestAuditTool extends AbstractAuditToolTest {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestAuditTool.class);
+
+  /**
+   * Testing run method in AuditTool class by passing source and destination
+   * paths.
+   */
+  @Test
+  public void testRun() throws Exception {
+Path s3LogsPath = new Path("s3a://sravani-data/logs2");
+Path s3DestPath = new Path("s3a://sravani-data/summary");
+run(AUDIT, s3DestPath, s3LogsPath);
+S3AFileSystem s3AFileSystem =
+(S3AFileSystem) s3DestPath.getFileSystem(getConfiguration());
+Path avroFilePath = new Path("s3a://sravani-data/summary/AvroData.avro");
+assertTrue("Avro file should be present in destination path",
+s3AFileSystem.exists(avroFilePath));

Review Comment:
   yes loaded avro data and wrote some assertions to check the data



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937926001


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";

Review Comment:
   actually removed it because not using bindFileSystem() 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937910850


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merge all the audit logs present in a directory of.
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  /**
+   * Merge all the audit log files from a directory into single audit log file.
+   * @param auditLogsDirectoryPath path where audit log files are present.
+   * @throws IOException on any failure.
+   */
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Merging of audit log files present in a directory into a single audit 
log file
+if (auditLogFileNames != null && auditLogFileNames.length != 0) {
+  File auditLogFile = new File("AuditLogFile");

Review Comment:
   actually removed this part and combined merging, parsing and concerting into 
avro



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937900727


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merge all the audit logs present in a directory of.
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  /**
+   * Merge all the audit log files from a directory into single audit log file.
+   * @param auditLogsDirectoryPath path where audit log files are present.
+   * @throws IOException on any failure.
+   */
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Merging of audit log files present in a directory into a single audit 
log file
+if (auditLogFileNames != null && auditLogFileNames.length != 0) {
+  File auditLogFile = new File("AuditLogFile");
+  try (PrintWriter printWriter = new PrintWriter(auditLogFile,
+  "UTF-8")) {
+for (String singleAuditLogFileName : auditLogFileNames) {
+  File singleAuditLogFile =
+  new File(auditLogFilesDirectory, singleAuditLogFileName);
+  try (BufferedReader bufferedReader =
+  new BufferedReader(
+  new InputStreamReader(new 
FileInputStream(singleAuditLogFile),
+  "UTF-8"))) {
+String singleLine = bufferedReader.readLine();
+while (singleLine != null) {
+  printWriter.println(singleLine);

Review Comment:
   yes done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937910850


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merge all the audit logs present in a directory of.
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  /**
+   * Merge all the audit log files from a directory into single audit log file.
+   * @param auditLogsDirectoryPath path where audit log files are present.
+   * @throws IOException on any failure.
+   */
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Merging of audit log files present in a directory into a single audit 
log file
+if (auditLogFileNames != null && auditLogFileNames.length != 0) {
+  File auditLogFile = new File("AuditLogFile");

Review Comment:
   actually removed this part and combined merging, parsing and converting into 
avro



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937903008


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**
+   * Sample directories and files to test.
+   */
+  private final File auditLogFile = new File("AuditLogFile");
+  private File file1;
+  private File file2;
+  private File file3;
+  private File dir1;
+  private File dir2;
+
+  /**
+   * Testing the mergeFiles method in Merger class.
+   * by passing a sample directory which contains files with some content in 
it.
+   * and checks if files in a directory are merged into single file.
+   */
+  @Test
+  public void testMergeFiles() throws IOException {
+dir1 = Files.createTempDirectory("sampleFilesDirectory").toFile();
+file1 = File.createTempFile("sampleFile1", ".txt", dir1);
+file2 = File.createTempFile("sampleFile2", ".txt", dir1);
+file3 = File.createTempFile("sampleFile3", ".txt", dir1);
+try (FileWriter fw = new FileWriter(file1);
+FileWriter fw1 = new FileWriter(file2);
+FileWriter fw2 = new FileWriter(file3)) {
+  fw.write("abcd");
+  fw1.write("efgh");
+  fw2.write("ijkl");
+}
+s3AAuditLogMerger.mergeFiles(dir1.getPath());
+String str =
+new String(Files.readAllBytes(Paths.get(auditLogFile.getPath(;
+//File content of each audit log file in merged audit log file are
+// divided by '\n'.
+// Performing assertions will be easy by replacing '\n' with ''
+String fileText = str.replace("\n", "");
+assertTrue("the string 'abcd' should be in the merged file",

Review Comment:
   actually no need of this check, so removed it



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937908465


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+

Review Comment:
   yeah passed both input and output path as arguments and validated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937906506


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merge all the audit logs present in a directory of.
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  /**
+   * Merge all the audit log files from a directory into single audit log file.
+   * @param auditLogsDirectoryPath path where audit log files are present.
+   * @throws IOException on any failure.
+   */
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Merging of audit log files present in a directory into a single audit 
log file
+if (auditLogFileNames != null && auditLogFileNames.length != 0) {
+  File auditLogFile = new File("AuditLogFile");
+  try (PrintWriter printWriter = new PrintWriter(auditLogFile,
+  "UTF-8")) {
+for (String singleAuditLogFileName : auditLogFileNames) {
+  File singleAuditLogFile =
+  new File(auditLogFilesDirectory, singleAuditLogFileName);
+  try (BufferedReader bufferedReader =
+  new BufferedReader(
+  new InputStreamReader(new 
FileInputStream(singleAuditLogFile),
+  "UTF-8"))) {

Review Comment:
   modified



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937906165


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**
+   * Sample directories and files to test.
+   */
+  private final File auditLogFile = new File("AuditLogFile");
+  private File file1;
+  private File file2;
+  private File file3;
+  private File dir1;
+  private File dir2;
+
+  /**
+   * Testing the mergeFiles method in Merger class.
+   * by passing a sample directory which contains files with some content in 
it.
+   * and checks if files in a directory are merged into single file.
+   */
+  @Test
+  public void testMergeFiles() throws IOException {
+dir1 = Files.createTempDirectory("sampleFilesDirectory").toFile();
+file1 = File.createTempFile("sampleFile1", ".txt", dir1);
+file2 = File.createTempFile("sampleFile2", ".txt", dir1);
+file3 = File.createTempFile("sampleFile3", ".txt", dir1);
+try (FileWriter fw = new FileWriter(file1);
+FileWriter fw1 = new FileWriter(file2);
+FileWriter fw2 = new FileWriter(file3)) {
+  fw.write("abcd");
+  fw1.write("efgh");
+  fw2.write("ijkl");
+}
+s3AAuditLogMerger.mergeFiles(dir1.getPath());
+String str =
+new String(Files.readAllBytes(Paths.get(auditLogFile.getPath(;
+//File content of each audit log file in merged audit log file are
+// divided by '\n'.
+// Performing assertions will be easy by replacing '\n' with ''
+String fileText = str.replace("\n", "");
+assertTrue("the string 'abcd' should be in the merged file",
+fileText.contains("abcd"));
+assertTrue("the string 'efgh' should be in the merged file",
+fileText.contains("efgh"));
+assertTrue("the string 'ijkl' should be in the merged file",
+fileText.contains("ijkl"));
+  }
+
+  /**
+   * Testing the merged file.
+   * by passing different directories which contains files with some content.
+   * in it and checks if the file is overwritten by new file contents.
+   */
+  @Test
+  public void testMergedFile() throws IOException {
+//Testing the merged file with contents of first directory
+dir1 = Files.createTempDirectory("sampleFilesDirectory").toFile();
+file1 = File.createTempFile("sampleFile1", ".txt", dir1);
+file2 = File.createTempFile("sampleFile2", ".txt", dir1);
+try (FileWriter fw = new FileWriter(file1);
+FileWriter fw1 = new FileWriter(file2)) {
+  fw.write("abcd");
+  fw1.write("efgh");
+}
+s3AAuditLogMerger.mergeFiles(dir1.getPath());
+String str =
+new String(Files.readAllBytes(Paths.get(auditLogFile.getPath(;
+//File content of each audit log file in merged audit log file are
+// divided by '\n'.
+// Performing assertions will be easy by replacing '\n' with ''
+String fileText = str.replace("\n", "");
+assertTrue("the string 'abcd' should be in the merged file",
+fileText.contains("abcd"));
+assertTrue("the string 'efgh' should be in the merged file",
+fileText.contains("efgh"));
+assertFalse("the string 'ijkl' should not be in the merged file",
+fileText.contains("ijkl"));
+
+//Testing the merged file with contents of second directory
+dir2 = Files.createTempDirectory("sampleFilesDirectory1").toFile();
+file3 = File.createTempFile("sampleFile3"

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937897093


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937900247


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merge all the audit logs present in a directory of.
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  /**
+   * Merge all the audit log files from a directory into single audit log file.
+   * @param auditLogsDirectoryPath path where audit log files are present.
+   * @throws IOException on any failure.
+   */
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Merging of audit log files present in a directory into a single audit 
log file
+if (auditLogFileNames != null && auditLogFileNames.length != 0) {
+  File auditLogFile = new File("AuditLogFile");

Review Comment:
   changed to s3a url's



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937899698


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));

Review Comment:
   modified as above



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937898572


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//Setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//Merging local audit files into a single file
+File s3aLogsDirectory = new File(s3LogsPath.getName());
+boolean s3aLogsDirectoryCreation = false;
+if (!s3aLogsDirectory.exists()) {
+  s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir();
+}
+if(s3aLogsDirectoryCreation) {
+  while (listOfS3LogFiles.hasNext()) {
+Path s3LogFilePath = listOfS3LogFiles.next().getPath();
+File s3LogLocalFilePath =
+new File(s3aLogsDirectory, s3LogFilePath.getName());
+boolean localFileCreation = s3LogLocalFilePath.createNewFile();
+if (localFileCreation) {
+  FileStatu

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937898011


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//Setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//Merging local audit files into a single file
+File s3aLogsDirectory = new File(s3LogsPath.getName());
+boolean s3aLogsDirectoryCreation = false;
+if (!s3aLogsDirectory.exists()) {

Review Comment:
   added test



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubs

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937895338


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937712482


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditTool.java:
##
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+import static org.apache.hadoop.fs.s3a.audit.AuditTool.AUDIT;
+
+/**
+ * This will implement tests on AuditTool class.
+ */
+public class TestAuditTool extends AbstractAuditToolTest {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestAuditTool.class);
+
+  /**
+   * Testing run method in AuditTool class by passing source and destination
+   * paths.
+   */
+  @Test
+  public void testRun() throws Exception {
+Path s3LogsPath = new Path("s3a://sravani-data/logs2");

Review Comment:
   yes as suggested above I created sample directory and a sample file which 
contain audit log and passed the paths using 
   `Path path = new Path(sampleFile.toUri())`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937709025


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937707519


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-08-04 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r937603168


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * This will implement different tests on S3AAuditLogMergerAndParser class.
+ */
+public class TestS3AAuditLogMergerAndParser extends AbstractS3ATestBase {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(TestS3AAuditLogMergerAndParser.class);
+
+  /**
+   * A real log entry.
+   * This is derived from a real log entry on a test run.
+   * If this needs to be updated, please do it from a real log.
+   * Splitting this up across lines has a tendency to break things, so
+   * be careful making changes.
+   */
+  private final String sampleLogEntry =
+  "183c9826b45486e485693808f38e2c4071004bf5dfd4c3ab210f0a21a400"
+  + " bucket-london"
+  + " [13/May/2021:11:26:06 +]"
+  + " 109.157.171.174"
+  + " arn:aws:iam::152813717700:user/dev"
+  + " M7ZB7C4RTKXJKTM9"
+  + " REST.PUT.OBJECT"
+  + " fork-0001/test/testParseBrokenCSVFile"
+  + " \"PUT /fork-0001/test/testParseBrokenCSVFile HTTP/1.1\""
+  + " 200"
+  + " -"
+  + " -"
+  + " 794"
+  + " 55"
+  + " 17"
+  + " \"https://audit.example.org/hadoop/1/op_create/";
+  + "e8ede3c7-8506-4a43-8268-fe8fcbb510a4-0278/"
+  + "?op=op_create"
+  + "&p1=fork-0001/test/testParseBrokenCSVFile"
+  + "&pr=alice"
+  + "&ps=2eac5a04-2153-48db-896a-09bc9a2fd132"
+  + "&id=e8ede3c7-8506-4a43-8268-fe8fcbb510a4-0278&t0=154"
+  + "&fs=e8ede3c7-8506-4a43-8268-fe8fcbb510a4&t1=156&"
+  + "ts=1620905165700\""
+  + " \"Hadoop 3.4.0-SNAPSHOT, java/1.8.0_282 vendor/AdoptOpenJDK\""
+  + " -"
+  + " TrIqtEYGWAwvu0h1N9WJKyoqM0TyHUaY+ZZBwP2yNf2qQp1Z/0="
+  + " SigV4"
+  + " ECDHE-RSA-AES128-GCM-SHA256"
+  + " AuthHeader"
+  + " bucket-london.s3.eu-west-2.amazonaws.com"
+  + " TLSv1.2";
+
+  /**
+   * A real referrer header entry.
+   * This is derived from a real log entry on a test run.
+   * If this needs to be updated, please do it from a real log.
+   * Splitting this up across lines has a tendency to break things, so
+   * be careful making changes.
+   */
+  private final String sampleReferrerHeader =
+  "\"https://audit.example.org/hadoop/1/op_create/";
+  + "e8ede3c7-8506-4a43-8268-fe8fcbb510a4-0278/?"
+  + "op=op_create"
+  + "&p1=fork-0001/test/testParseBrokenCSVFile"
+  + "&pr=alice"
+  + "&ps=2eac5a04-2153-48db-896a-09bc9a2fd132"
+  + "&id=e8ede3c7-8506-4a43-8268-fe8fcbb510a4-0278&t0=154"
+  + "&fs=e8ede3c7-8506-4a43-8268-fe8fcbb510a4&t1=156"
+  + "&ts=1620905165700\"";
+
+  private final S3AAuditLogMergerAndParser s3AAuditLogMergerAndParser =
+  new S3AAuditLogMergerAndParser();
+
+  /**
+   * Testing parseAuditLog method in parser class by passing sample audit log
+   * entry and checks if the log is parsed correctly.
+   */
+  @Test
+  public void testParseAuditLog() {
+Map parseAuditLogResult =
+s3AAuditLogMergerAndParser.parseAuditLog(sampleLogEntry);
+assertNotNull("the result of parseAuditLogResult should be not null",
+parseAuditLogResult);
+//verifying the bucket from parsed audit log
+assertEquals("the expected and actual results should be same",
+"bucket-london", parseAuditLogResult.get("bucket"));
+//verifying the remoteip from parsed audit log
+assertEquals("the expected and actual results should be same",
+"109.157.171.174", parseAuditLogResult.get("

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-16 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r922774936


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-15 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r922034066


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-15 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r921968636


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-15 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r921968917


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-13 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r920814131


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-13 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r920812601


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditToolTest.java:
##
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static 
org.apache.hadoop.fs.s3a.s3guard.S3GuardToolTestHelper.runS3GuardCommand;
+
+/**
+ * An extension of the contract test base set up for S3A tests.
+ */
+public class AbstractAuditToolTest extends AbstractS3ATestBase {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(AbstractAuditToolTest.class);
+
+  /**
+   * Take a configuration, copy it and disable FS Caching on
+   * the new one.
+   *
+   * @param conf source config
+   * @return a new, patched, config
+   */
+  protected Configuration uncachedFSConfig(final Configuration conf) {

Review Comment:
   removed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-13 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r920794339


##
hadoop-tools/hadoop-aws/pom.xml:
##
@@ -438,6 +438,23 @@
   
 
   
+  
+org.apache.avro
+avro-maven-plugin
+
+  
+generate-avro-sources
+generate-sources
+
+  schema
+
+  
+
+
+  
../../hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/avro

Review Comment:
   changed to src/main



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-13 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r920793828


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-13 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r920791859


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-13 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r920187510


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-07-13 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r920186659


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMergerAndParser.java:
##
@@ -0,0 +1,546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+/**
+ * Merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMergerAndParser {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMergerAndParser.class);
+
+  /**
+   * Simple entry: anything up to a space.
+   * {@value}.
+   */
+  private static final String SIMPLE = "[^ ]*";
+
+  /**
+   * Date/Time. Everything within square braces.
+   * {@value}.
+   */
+  private static final String DATETIME = "\\[(.*?)\\]";
+
+  /**
+   * A natural number or "-".
+   * {@value}.
+   */
+  private static final String NUMBER = "(-|[0-9]*)";
+
+  /**
+   * A Quoted field or "-".
+   * {@value}.
+   */
+  private static final String QUOTED = "(-|\"[^\"]*\")";
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String e(String name, String pattern) {
+return String.format("(?<%s>%s) ", name, pattern);
+  }
+
+  /**
+   * An entry in the regexp.
+   *
+   * @param namename of the group
+   * @param pattern pattern to use in the regexp
+   * @return the pattern for the regexp
+   */
+  private static String eNoTrailing(String name, String pattern) {
+return String.format("(?<%s>%s)", name, pattern);
+  }
+
+  /**
+   * Simple entry using the {@link #SIMPLE} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String e(String name) {
+return e(name, SIMPLE);
+  }
+
+  /**
+   * Quoted entry using the {@link #QUOTED} pattern.
+   *
+   * @param name name of the element (for code clarity only)
+   * @return the pattern for the regexp
+   */
+  private static String q(String name) {
+return e(name, QUOTED);
+  }
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String OWNER_GROUP = "owner";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String BUCKET_GROUP = "bucket";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String TIMESTAMP_GROUP = "timestamp";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REMOTEIP_GROUP = "remoteip";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTER_GROUP = "requester";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTID_GROUP = "requestid";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String VERB_GROUP = "verb";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String KEY_GROUP = "key";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String REQUESTURI_GROUP = "requesturi";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String HTTP_GROUP = "http";
+
+  /**
+   * Log group {@value}.
+   */
+  public static final String AWSERRORCODE_GROUP = "awserrorcode";
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-23 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r904912305


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-22 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r904609878


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merge all the audit logs present in a directory of.
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  /**
+   * Merge all the audit log files from a directory into single audit log file.
+   * @param auditLogsDirectoryPath path where audit log files are present.
+   * @throws IOException on any failure.
+   */
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Merging of audit log files present in a directory into a single audit 
log file
+if (auditLogFileNames != null && auditLogFileNames.length != 0) {
+  File auditLogFile = new File("AuditLogFile");

Review Comment:
   Do you mean listing of files in s3 path and iterating over that that?
   Can you please brief it
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-22 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r903633874


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {

Review Comment:
   extended AbstractHadoopTestBase.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-22 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r903630927


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//Setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//Merging local audit files into a single file
+File s3aLogsDirectory = new File(s3LogsPath.getName());
+boolean s3aLogsDirectoryCreation = false;
+if (!s3aLogsDirectory.exists()) {
+  s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir();
+}
+if(s3aLogsDirectoryCreation) {
+  while (listOfS3LogFiles.hasNext()) {
+Path s3LogFilePath = listOfS3LogFiles.next().getPath();
+File s3LogLocalFilePath =

Review Comment:
   modified file name to s3LogLocalFile



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL abov

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-22 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r903629695


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//Setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//Merging local audit files into a single file
+File s3aLogsDirectory = new File(s3LogsPath.getName());
+boolean s3aLogsDirectoryCreation = false;
+if (!s3aLogsDirectory.exists()) {
+  s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir();
+}
+if(s3aLogsDirectoryCreation) {

Review Comment:
   added space



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-22 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r903626816


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//Setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));

Review Comment:
   modified to ```FileSystem.get(fsURI, getConf())```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-22 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r903623994


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**
+ * AuditTool is a Command Line Interface.
+ * i.e, it's functionality is to parse the merged audit log file.
+ * and generate avro file.
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  public AuditTool() {
+  }
+
+  /**
+   * Tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * This run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments.
+   * and merge the audit log files present in that path into single file in.
+   * local system.
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code.
+   * @throws Exception on any failure.
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//Path of audit log files in s3 bucket

Review Comment:
   added space
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-21 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r902335968


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merger class will merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private final Logger logger =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Read each audit log file present in directory and writes each and every 
audit log in it
+//into a single audit log file
+if (auditLogFileNames != null && auditLogFileNames.length != 0) {
+  File auditLogFile = new File("AuditLogFile");

Review Comment:
   If the file already exists, the data will overwrite into that existing file.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-21 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r902335062


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**.
+ * AuditTool is a Command Line Interface to manage S3 Auditing.
+ * i.e, it is a functionality which directly takes s3 path of audit log files
+ * and merge all those into single audit log file
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  private final File s3aLogsDirectory = new File("S3AAuditLogsDirectory");
+
+  public AuditTool() {
+  }
+
+  /**
+   * tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * this run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments
+   * and merge the audit log files present in that path into single file in 
local system
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code
+   * @throws Exception on any failure
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+println("argv: %s", argv);
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//creating local audit log files directory and
+//copying audit log files into local files from s3 bucket
+//so that it will be easy for us to implement merging and parsing classes
+if (!s3aLogsDirectory.exists()) {
+  boolean s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir();
+}
+File s3aLogsSubDir = new File(s3aLogsDirectory, s3LogsPath.getName());
+boolean s3aLogsSubDirCreation = false;
+if (!s3aLogsSubDir.exists()) {
+  s3aLogsSub

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r902171219


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**.
+ * AuditTool is a Command Line Interface to manage S3 Auditing.
+ * i.e, it is a functionality which directly takes s3 path of audit log files
+ * and merge all those into single audit log file

Review Comment:
   modified javadocs



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r902171306


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**.
+ * AuditTool is a Command Line Interface to manage S3 Auditing.
+ * i.e, it is a functionality which directly takes s3 path of audit log files
+ * and merge all those into single audit log file
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  private final File s3aLogsDirectory = new File("S3AAuditLogsDirectory");
+
+  public AuditTool() {
+  }
+
+  /**
+   * tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * this run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments
+   * and merge the audit log files present in that path into single file in 
local system
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code
+   * @throws Exception on any failure
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+println("argv: {}" , argv);

Review Comment:
   removed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901861689


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**.
+ * AuditTool is a Command Line Interface to manage S3 Auditing.
+ * i.e, it is a functionality which directly takes s3 path of audit log files
+ * and merge all those into single audit log file
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  private final File s3aLogsDirectory = new File("S3AAuditLogsDirectory");
+
+  public AuditTool() {
+  }
+
+  /**
+   * tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * this run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments
+   * and merge the audit log files present in that path into single file in 
local system
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code
+   * @throws Exception on any failure
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+println("argv: %s", argv);
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//creating local audit log files directory and
+//copying audit log files into local files from s3 bucket
+//so that it will be easy for us to implement merging and parsing classes
+if (!s3aLogsDirectory.exists()) {
+  boolean s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir();
+}
+File s3aLogsSubDir = new File(s3aLogsDirectory, s3LogsPath.getName());

Review Comment:
   Actually no need of sub-dir as we are cleaning up the local files. I'll 
modi

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901855926


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private final Logger logger = 
LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**
+   * sample directories and files to test.
+   */
+  private final File auditLogFile = new File("AuditLogFile");
+  private final File sampleDirectory = new File("sampleFilesDirectory");
+  private final File emptyDirectory = new File("emptyFilesDirectory");
+  private final File firstSampleFile =
+  new File("sampleFilesDirectory", "sampleFile1.txt");
+  private final File secondSampleFile =
+  new File("sampleFilesDirectory", "sampleFile2.txt");
+  private final File thirdSampleFile =
+  new File("sampleFilesDirectory", "sampleFile3.txt");

Review Comment:
   yeah i'll will try in this way then.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901850160


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private final Logger logger = 
LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**
+   * sample directories and files to test.
+   */
+  private final File auditLogFile = new File("AuditLogFile");
+  private final File sampleDirectory = new File("sampleFilesDirectory");
+  private final File emptyDirectory = new File("emptyFilesDirectory");
+  private final File firstSampleFile =
+  new File("sampleFilesDirectory", "sampleFile1.txt");
+  private final File secondSampleFile =
+  new File("sampleFilesDirectory", "sampleFile2.txt");
+  private final File thirdSampleFile =
+  new File("sampleFilesDirectory", "sampleFile3.txt");
+
+  /**
+   * creates the sample directories and files before each test.
+   *
+   * @throws IOException on failure
+   */
+  @Before
+  public void setUp() throws IOException {
+boolean sampleDirCreation = sampleDirectory.mkdir();
+boolean emptyDirCreation = emptyDirectory.mkdir();
+if (sampleDirCreation && emptyDirCreation) {
+  try (FileWriter fw = new FileWriter(firstSampleFile);
+  FileWriter fw1 = new FileWriter(secondSampleFile);
+  FileWriter fw2 = new FileWriter(thirdSampleFile)) {
+fw.write("abcd");
+fw1.write("efgh");
+fw2.write("ijkl");
+  }
+}
+  }
+
+  /**
+   * mergeFilesTest() will test the mergeFiles() method in Merger class.
+   * by passing a sample directory which contains files with some content in it
+   * and checks if files in a directory are merged into single file
+   *
+   * @throws IOException on any failure
+   */
+  @Test
+  public void mergeFilesTest() throws IOException {
+s3AAuditLogMerger.mergeFiles(sampleDirectory.getPath());
+String str =
+new String(Files.readAllBytes(Paths.get(auditLogFile.getPath(;
+String fileText = str.replace("\n", "");

Review Comment:
   added comment



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901846695


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private final Logger logger = 
LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**
+   * sample directories and files to test.
+   */
+  private final File auditLogFile = new File("AuditLogFile");
+  private final File sampleDirectory = new File("sampleFilesDirectory");
+  private final File emptyDirectory = new File("emptyFilesDirectory");
+  private final File firstSampleFile =
+  new File("sampleFilesDirectory", "sampleFile1.txt");
+  private final File secondSampleFile =
+  new File("sampleFilesDirectory", "sampleFile2.txt");
+  private final File thirdSampleFile =
+  new File("sampleFilesDirectory", "sampleFile3.txt");
+
+  /**
+   * creates the sample directories and files before each test.
+   *
+   * @throws IOException on failure
+   */
+  @Before
+  public void setUp() throws IOException {
+boolean sampleDirCreation = sampleDirectory.mkdir();
+boolean emptyDirCreation = emptyDirectory.mkdir();
+if (sampleDirCreation && emptyDirCreation) {
+  try (FileWriter fw = new FileWriter(firstSampleFile);
+  FileWriter fw1 = new FileWriter(secondSampleFile);
+  FileWriter fw2 = new FileWriter(thirdSampleFile)) {
+fw.write("abcd");
+fw1.write("efgh");
+fw2.write("ijkl");
+  }
+}
+  }
+
+  /**
+   * mergeFilesTest() will test the mergeFiles() method in Merger class.
+   * by passing a sample directory which contains files with some content in it
+   * and checks if files in a directory are merged into single file
+   *
+   * @throws IOException on any failure
+   */
+  @Test
+  public void mergeFilesTest() throws IOException {
+s3AAuditLogMerger.mergeFiles(sampleDirectory.getPath());
+String str =
+new String(Files.readAllBytes(Paths.get(auditLogFile.getPath(;
+String fileText = str.replace("\n", "");
+assertTrue("the string 'abcd' should be in the merged file",
+fileText.contains("abcd"));
+assertTrue("the string 'efgh' should be in the merged file",
+fileText.contains("efgh"));
+assertTrue("the string 'ijkl' should be in the merged file",
+fileText.contains("ijkl"));
+  }
+
+  /**
+   * mergeFilesTestEmpty() will test the mergeFiles().
+   * by passing an empty directory and checks if merged file is created or not
+   *
+   * @throws IOException on any failure
+   */
+  @Test
+  public void mergeFilesTestEmpty() throws IOException {

Review Comment:
   changed name of the test



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901846062


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private final Logger logger = 
LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**
+   * sample directories and files to test.
+   */
+  private final File auditLogFile = new File("AuditLogFile");
+  private final File sampleDirectory = new File("sampleFilesDirectory");
+  private final File emptyDirectory = new File("emptyFilesDirectory");
+  private final File firstSampleFile =
+  new File("sampleFilesDirectory", "sampleFile1.txt");
+  private final File secondSampleFile =
+  new File("sampleFilesDirectory", "sampleFile2.txt");
+  private final File thirdSampleFile =
+  new File("sampleFilesDirectory", "sampleFile3.txt");
+
+  /**
+   * creates the sample directories and files before each test.
+   *
+   * @throws IOException on failure
+   */
+  @Before
+  public void setUp() throws IOException {
+boolean sampleDirCreation = sampleDirectory.mkdir();
+boolean emptyDirCreation = emptyDirectory.mkdir();
+if (sampleDirCreation && emptyDirCreation) {
+  try (FileWriter fw = new FileWriter(firstSampleFile);
+  FileWriter fw1 = new FileWriter(secondSampleFile);
+  FileWriter fw2 = new FileWriter(thirdSampleFile)) {
+fw.write("abcd");
+fw1.write("efgh");
+fw2.write("ijkl");
+  }
+}
+  }
+
+  /**
+   * mergeFilesTest() will test the mergeFiles() method in Merger class.
+   * by passing a sample directory which contains files with some content in it
+   * and checks if files in a directory are merged into single file
+   *
+   * @throws IOException on any failure
+   */
+  @Test
+  public void mergeFilesTest() throws IOException {
+s3AAuditLogMerger.mergeFiles(sampleDirectory.getPath());
+String str =
+new String(Files.readAllBytes(Paths.get(auditLogFile.getPath(;
+String fileText = str.replace("\n", "");
+assertTrue("the string 'abcd' should be in the merged file",
+fileText.contains("abcd"));
+assertTrue("the string 'efgh' should be in the merged file",
+fileText.contains("efgh"));
+assertTrue("the string 'ijkl' should be in the merged file",
+fileText.contains("ijkl"));
+  }
+
+  /**
+   * mergeFilesTestEmpty() will test the mergeFiles().

Review Comment:
   removed test name



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901828806


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private final Logger logger = 
LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**
+   * sample directories and files to test.
+   */
+  private final File auditLogFile = new File("AuditLogFile");
+  private final File sampleDirectory = new File("sampleFilesDirectory");
+  private final File emptyDirectory = new File("emptyFilesDirectory");
+  private final File firstSampleFile =
+  new File("sampleFilesDirectory", "sampleFile1.txt");
+  private final File secondSampleFile =
+  new File("sampleFilesDirectory", "sampleFile2.txt");
+  private final File thirdSampleFile =
+  new File("sampleFilesDirectory", "sampleFile3.txt");
+
+  /**
+   * creates the sample directories and files before each test.
+   *
+   * @throws IOException on failure
+   */
+  @Before
+  public void setUp() throws IOException {
+boolean sampleDirCreation = sampleDirectory.mkdir();
+boolean emptyDirCreation = emptyDirectory.mkdir();
+if (sampleDirCreation && emptyDirCreation) {
+  try (FileWriter fw = new FileWriter(firstSampleFile);
+  FileWriter fw1 = new FileWriter(secondSampleFile);
+  FileWriter fw2 = new FileWriter(thirdSampleFile)) {
+fw.write("abcd");
+fw1.write("efgh");
+fw2.write("ijkl");
+  }
+}
+  }
+
+  /**
+   * mergeFilesTest() will test the mergeFiles() method in Merger class.
+   * by passing a sample directory which contains files with some content in it
+   * and checks if files in a directory are merged into single file
+   *
+   * @throws IOException on any failure

Review Comment:
   removed @throws



##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private final Logger logger = 
LoggerFactory.getLogger(TestS3AAuditLogMerger.class);
+
+  private final S3AAuditLogMerger s3AAuditLogMerger = new S3AAuditLogMerger();
+
+  /**

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901827942


##
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestS3AAuditLogMerger.java:
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * MergerTest will implement different tests on Merger class methods.
+ */
+public class TestS3AAuditLogMerger {
+
+  private final Logger logger = 
LoggerFactory.getLogger(TestS3AAuditLogMerger.class);

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901825553


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**.
+ * AuditTool is a Command Line Interface to manage S3 Auditing.
+ * i.e, it is a functionality which directly takes s3 path of audit log files
+ * and merge all those into single audit log file
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  private final File s3aLogsDirectory = new File("S3AAuditLogsDirectory");
+
+  public AuditTool() {
+  }
+
+  /**
+   * tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * this run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments
+   * and merge the audit log files present in that path into single file in 
local system
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code
+   * @throws Exception on any failure
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+println("argv: %s", argv);
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//creating local audit log files directory and
+//copying audit log files into local files from s3 bucket
+//so that it will be easy for us to implement merging and parsing classes
+if (!s3aLogsDirectory.exists()) {
+  boolean s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir();
+}
+File s3aLogsSubDir = new File(s3aLogsDirectory, s3LogsPath.getName());
+boolean s3aLogsSubDirCreation = false;
+if (!s3aLogsSubDir.exists()) {
+  s3aLogsSub

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901821972


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**.
+ * AuditTool is a Command Line Interface to manage S3 Auditing.
+ * i.e, it is a functionality which directly takes s3 path of audit log files
+ * and merge all those into single audit log file
+ */
+public class AuditTool extends Configured implements Tool, Closeable {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
+
+  private final String entryPoint = "s3audit";
+
+  private PrintWriter out;
+
+  // Exit codes
+  private static final int SUCCESS = EXIT_SUCCESS;
+  private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
+
+  /**
+   * Error String when the wrong FS is used for binding: {@value}.
+   **/
+  @VisibleForTesting
+  public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
+
+  private final String usage = entryPoint + "  s3a://BUCKET\n";
+
+  private final File s3aLogsDirectory = new File("S3AAuditLogsDirectory");
+
+  public AuditTool() {
+  }
+
+  /**
+   * tells us the usage of the AuditTool by commands.
+   *
+   * @return the string USAGE
+   */
+  public String getUsage() {
+return usage;
+  }
+
+  /**
+   * this run method in AuditTool takes S3 bucket path.
+   * which contains audit log files from command line arguments
+   * and merge the audit log files present in that path into single file in 
local system
+   *
+   * @param args command specific arguments.
+   * @return SUCCESS i.e, '0', which is an exit code
+   * @throws Exception on any failure
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+List argv = new ArrayList<>(Arrays.asList(args));
+println("argv: %s", argv);
+if (argv.isEmpty()) {
+  errorln(getUsage());
+  throw invalidArgs("No bucket specified");
+}
+//path of audit log files in s3 bucket
+Path s3LogsPath = new Path(argv.get(0));
+
+//setting the file system
+URI fsURI = toUri(String.valueOf(s3LogsPath));
+S3AFileSystem s3AFileSystem =
+bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+RemoteIterator listOfS3LogFiles =
+s3AFileSystem.listFiles(s3LogsPath, true);
+
+//creating local audit log files directory and
+//copying audit log files into local files from s3 bucket
+//so that it will be easy for us to implement merging and parsing classes

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this 

[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901821623


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merger class will merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private final Logger logger =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {
+File auditLogFilesDirectory = new File(auditLogsDirectoryPath);
+String[] auditLogFileNames = auditLogFilesDirectory.list();
+
+//Read each audit log file present in directory and writes each and every 
audit log in it
+//into a single audit log file

Review Comment:
   changed comment



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901819850


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merger class will merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private final Logger logger =
+  LoggerFactory.getLogger(S3AAuditLogMerger.class);
+
+  public void mergeFiles(String auditLogsDirectoryPath) throws IOException {

Review Comment:
   added javadocs



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901792603


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditLogMerger.java:
##
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Merger class will merge all the audit logs present in a directory of
+ * multiple audit log files into a single audit log file.
+ */
+public class S3AAuditLogMerger {
+
+  private final Logger logger =

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] sravanigadey commented on a diff in pull request #4383: HADOOP-18258. Merging of S3A Audit Logs

2022-06-20 Thread GitBox


sravanigadey commented on code in PR #4383:
URL: https://github.com/apache/hadoop/pull/4383#discussion_r901544959


##
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java:
##
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.audit;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE;
+import static 
org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
+
+/**.

Review Comment:
   removed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org