[ https://issues.apache.org/jira/browse/HADOOP-18258?focusedWorklogId=783260&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-783260 ]
ASF GitHub Bot logged work on HADOOP-18258: ------------------------------------------- Author: ASF GitHub Bot Created on: 21/Jun/22 08:42 Start Date: 21/Jun/22 08:42 Worklog Time Spent: 10m Work Description: sravanigadey commented on code in PR #4383: URL: https://github.com/apache/hadoop/pull/4383#discussion_r902335062 ########## hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditTool.java: ########## @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.audit; + +import java.io.Closeable; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FilterFileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR; +import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SERVICE_UNAVAILABLE; +import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS; + +/**. + * AuditTool is a Command Line Interface to manage S3 Auditing. + * i.e, it is a functionality which directly takes s3 path of audit log files + * and merge all those into single audit log file + */ +public class AuditTool extends Configured implements Tool, Closeable { + + private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class); + + private final String entryPoint = "s3audit"; + + private PrintWriter out; + + // Exit codes + private static final int SUCCESS = EXIT_SUCCESS; + private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR; + + /** + * Error String when the wrong FS is used for binding: {@value}. + **/ + @VisibleForTesting + public static final String WRONG_FILESYSTEM = "Wrong filesystem for "; + + private final String usage = entryPoint + " s3a://BUCKET\n"; + + private final File s3aLogsDirectory = new File("S3AAuditLogsDirectory"); + + public AuditTool() { + } + + /** + * tells us the usage of the AuditTool by commands. + * + * @return the string USAGE + */ + public String getUsage() { + return usage; + } + + /** + * this run method in AuditTool takes S3 bucket path. + * which contains audit log files from command line arguments + * and merge the audit log files present in that path into single file in local system + * + * @param args command specific arguments. + * @return SUCCESS i.e, '0', which is an exit code + * @throws Exception on any failure + */ + @Override + public int run(String[] args) throws Exception { + List<String> argv = new ArrayList<>(Arrays.asList(args)); + println("argv: %s", argv); + if (argv.isEmpty()) { + errorln(getUsage()); + throw invalidArgs("No bucket specified"); + } + //path of audit log files in s3 bucket + Path s3LogsPath = new Path(argv.get(0)); + + //setting the file system + URI fsURI = toUri(String.valueOf(s3LogsPath)); + S3AFileSystem s3AFileSystem = + bindFilesystem(FileSystem.newInstance(fsURI, getConf())); + RemoteIterator<LocatedFileStatus> listOfS3LogFiles = + s3AFileSystem.listFiles(s3LogsPath, true); + + //creating local audit log files directory and + //copying audit log files into local files from s3 bucket + //so that it will be easy for us to implement merging and parsing classes + if (!s3aLogsDirectory.exists()) { + boolean s3aLogsDirectoryCreation = s3aLogsDirectory.mkdir(); + } + File s3aLogsSubDir = new File(s3aLogsDirectory, s3LogsPath.getName()); + boolean s3aLogsSubDirCreation = false; + if (!s3aLogsSubDir.exists()) { + s3aLogsSubDirCreation = s3aLogsSubDir.mkdir(); + } + if (s3aLogsSubDirCreation) { + while (listOfS3LogFiles.hasNext()) { + Path s3LogFilePath = listOfS3LogFiles.next().getPath(); + File s3LogLocalFilePath = Review Comment: yes, they will be closed. Issue Time Tracking ------------------- Worklog Id: (was: 783260) Time Spent: 4h 40m (was: 4.5h) > Merging of S3A Audit Logs > ------------------------- > > Key: HADOOP-18258 > URL: https://issues.apache.org/jira/browse/HADOOP-18258 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/s3 > Reporter: Sravani Gadey > Assignee: Sravani Gadey > Priority: Major > Labels: pull-request-available > Time Spent: 4h 40m > Remaining Estimate: 0h > > Merging audit log files containing huge number of audit logs collected from a > job like Hive or Spark job containing various S3 requests like list, head, > get and put requests. -- This message was sent by Atlassian Jira (v8.20.7#820007) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org