[ https://issues.apache.org/jira/browse/HADOOP-18258?focusedWorklogId=783821&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-783821 ]
ASF GitHub Bot logged work on HADOOP-18258: ------------------------------------------- Author: ASF GitHub Bot Created on: 22/Jun/22 11:38 Start Date: 22/Jun/22 11:38 Worklog Time Spent: 10m Work Description: sravanigadey commented on code in PR #4383: URL: https://github.com/apache/hadoop/pull/4383#discussion_r903630927 ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java: ########## @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.audit; + +import java.io.Closeable; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FilterFileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR; +import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE; +import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS; + +/** + * AuditTool is a Command Line Interface. + * i.e, it's functionality is to parse the merged audit log file. + * and generate avro file. + */ +public class AuditTool extends Configured implements Tool, Closeable { + + private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class); + + private final String entryPoint = "s3audit"; + + private PrintWriter out; + + // Exit codes + private static final int SUCCESS = EXIT_SUCCESS; + private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR; + + /** + * Error String when the wrong FS is used for binding: {@value}. + **/ + @VisibleForTesting + public static final String WRONG_FILESYSTEM = "Wrong filesystem for "; + + private final String usage = entryPoint + " s3a://BUCKET\n"; + + public AuditTool() { + } + + /** + * Tells us the usage of the AuditTool by commands. + * + * @return the string USAGE + */ + public String getUsage() { + return usage; + } + + /** + * This run method in AuditTool takes S3 bucket path. + * which contains audit log files from command line arguments. + * and merge the audit log files present in that path into single file in. + * local system. + * + * @param args command specific arguments. + * @return SUCCESS i.e, '0', which is an exit code. + * @throws Exception on any failure. + */ + @Override + public int run(String[] args) throws Exception { + List<String> argv = new ArrayList<>(Arrays.asList(args)); + if (argv.isEmpty()) { + errorln(getUsage()); + throw invalidArgs("No bucket specified"); + } + //Path of audit log files in s3 bucket + Path s3LogsPath = new Path(argv.get(0)); + + //Setting the file system + URI fsURI = toUri(String.valueOf(s3LogsPath)); + S3AFileSystem s3AFileSystem = + bindFilesystem(FileSystem.newInstance(fsURI, getConf())); + RemoteIterator<LocatedFileStatus> listOfS3LogFiles = + s3AFileSystem.listFiles(s3LogsPath, true); + + //Merging local audit files into a single file + File s3aLogsDirectory = new File(s3LogsPath.getName()); + boolean s3aLogsDirectoryCreation = false; + if (!s3aLogsDirectory.exists()) { + s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir(); + } + if(s3aLogsDirectoryCreation) { + while (listOfS3LogFiles.hasNext()) { + Path s3LogFilePath = listOfS3LogFiles.next().getPath(); + File s3LogLocalFilePath = Review Comment: modified file name to s3LogLocalFile Issue Time Tracking ------------------- Worklog Id: (was: 783821) Time Spent: 6h 10m (was: 6h) > Merging of S3A Audit Logs > ------------------------- > > Key: HADOOP-18258 > URL: https://issues.apache.org/jira/browse/HADOOP-18258 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/s3 > Reporter: Sravani Gadey > Assignee: Sravani Gadey > Priority: Major > Labels: pull-request-available > Time Spent: 6h 10m > Remaining Estimate: 0h > > Merging audit log files containing huge number of audit logs collected from a > job like Hive or Spark job containing various S3 requests like list, head, > get and put requests. -- This message was sent by Atlassian Jira (v8.20.7#820007) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org