Repository: hadoop Updated Branches: refs/heads/branch-2 b6d5546e2 -> 043a0c2e6 refs/heads/branch-2.8 d0dc5aaa2 -> b8216c10d refs/heads/trunk 97e244947 -> c58a59f70
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java new file mode 100644 index 0000000..d29cb2f --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +/** + * Statistic which are collected in S3A. + * These statistics are available at a low level in {@link S3AStorageStatistics} + * and as metrics in {@link S3AInstrumentation} + */ +public enum Statistic { + + DIRECTORIES_CREATED("directories_created", + "Total number of directories created through the object store."), + DIRECTORIES_DELETED("directories_deleted", + "Total number of directories deleted through the object store."), + FILES_COPIED("files_copied", + "Total number of files copied within the object store."), + FILES_COPIED_BYTES("files_copied_bytes", + "Total number of bytes copied within the object store."), + FILES_CREATED("files_created", + "Total number of files created through the object store."), + FILES_DELETED("files_deleted", + "Total number of files deleted from the object store."), + IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"), + INVOCATION_COPY_FROM_LOCAL_FILE("invocations_copyfromlocalfile", + "Calls of copyFromLocalFile()"), + INVOCATION_EXISTS("invocations_exists", + "Calls of exists()"), + INVOCATION_GET_FILE_STATUS("invocations_getfilestatus", + "Calls of getFileStatus()"), + INVOCATION_GLOB_STATUS("invocations_globstatus", + "Calls of globStatus()"), + INVOCATION_IS_DIRECTORY("invocations_is_directory", + "Calls of isDirectory()"), + INVOCATION_IS_FILE("invocations_is_file", + "Calls of isFile()"), + INVOCATION_LIST_FILES("invocations_listfiles", + "Calls of listFiles()"), + INVOCATION_LIST_LOCATED_STATUS("invocations_listlocatedstatus", + "Calls of listLocatedStatus()"), + INVOCATION_LIST_STATUS("invocations_liststatus", + "Calls of listStatus()"), + INVOCATION_MKDIRS("invocations_mdkirs", + "Calls of mkdirs()"), + INVOCATION_RENAME("invocations_rename", + "Calls of rename()"), + OBJECT_COPY_REQUESTS("object_copy_requests", "Object copy requests"), + OBJECT_DELETE_REQUESTS("object_delete_requests", "Object delete requests"), + OBJECT_LIST_REQUESTS("object_list_requests", + "Number of object listings made"), + OBJECT_METADATA_REQUESTS("object_metadata_requests", + "Number of requests for object metadata"), + OBJECT_MULTIPART_UPLOAD_ABORTED("object_multipart_aborted", + "Object multipart upload aborted"), + OBJECT_PUT_REQUESTS("object_put_requests", + "Object put/multipart upload count"), + OBJECT_PUT_BYTES("object_put_bytes", "number of bytes uploaded"), + STREAM_ABORTED("streamAborted", + "Count of times the TCP stream was aborted"), + STREAM_BACKWARD_SEEK_OPERATIONS("streamBackwardSeekOperations", + "Number of executed seek operations which went backwards in a stream"), + STREAM_CLOSED("streamClosed", "Count of times the TCP stream was closed"), + STREAM_CLOSE_OPERATIONS("streamCloseOperations", + "Total count of times an attempt to close a data stream was made"), + STREAM_FORWARD_SEEK_OPERATIONS("streamForwardSeekOperations", + "Number of executed seek operations which went forward in a stream"), + STREAM_OPENED("streamOpened", + "Total count of times an input stream to object store was opened"), + STREAM_READ_EXCEPTIONS("streamReadExceptions", + "Number of seek operations invoked on input streams"), + STREAM_READ_FULLY_OPERATIONS("streamReadFullyOperations", + "count of readFully() operations in streams"), + STREAM_READ_OPERATIONS("streamReadOperations", + "Count of read() operations in streams"), + STREAM_READ_OPERATIONS_INCOMPLETE("streamReadOperationsIncomplete", + "Count of incomplete read() operations in streams"), + STREAM_SEEK_BYTES_BACKWARDS("streamBytesBackwardsOnSeek", + "Count of bytes moved backwards during seek operations"), + STREAM_SEEK_BYTES_READ("streamBytesRead", + "Count of bytes read during seek() in stream operations"), + STREAM_SEEK_BYTES_SKIPPED("streamBytesSkippedOnSeek", + "Count of bytes skipped during forward seek operation"), + STREAM_SEEK_OPERATIONS("streamSeekOperations", + "Number of read exceptions caught and attempted to recovered from"); + + Statistic(String symbol, String description) { + this.symbol = symbol; + this.description = description; + } + + private final String symbol; + private final String description; + + public String getSymbol() { + return symbol; + } + + /** + * Get a statistic from a symbol. + * @param symbol statistic to look up + * @return the value or null. + */ + public static Statistic fromSymbol(String symbol) { + if (symbol != null) { + for (Statistic opType : values()) { + if (opType.getSymbol().equals(symbol)) { + return opType; + } + } + } + return null; + } + + public String getDescription() { + return description; + } + + /** + * The string value is simply the symbol. + * This makes this operation very low cost. + * @return the symbol of this statistic. + */ + @Override + public String toString() { + return symbol; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 4f5a077..7a5e455 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -735,9 +735,19 @@ The exact number of operations to perform is configurable in the option Larger values generate more load, and are recommended when testing locally, or in batch runs. -Smaller values should result in faster test runs, especially when the object +Smaller values results in faster test runs, especially when the object store is a long way away. +Operations which work on directories have a separate option: this controls +the width and depth of tests creating recursive directories. Larger +values create exponentially more directories, with consequent performance +impact. + + <property> + <name>scale.test.directory.count</name> + <value>2</value> + </property> + DistCp tests targeting S3A support a configurable file size. The default is 10 MB, but the configuration value is expressed in KB so that it can be tuned smaller to achieve faster test runs. http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index a4f9b99..04010d6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -21,7 +21,9 @@ package org.apache.hadoop.fs.s3a; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; +import org.junit.Assert; import org.junit.internal.AssumptionViolatedException; +import org.slf4j.Logger; import java.io.IOException; import java.net.URI; @@ -190,4 +192,155 @@ public class S3ATestUtils { } } + /** + * Reset all metrics in a list. + * @param metrics metrics to reset + */ + public static void reset(S3ATestUtils.MetricDiff... metrics) { + for (S3ATestUtils.MetricDiff metric : metrics) { + metric.reset(); + } + } + + /** + * Print all metrics in a list. + * @param log log to print the metrics to. + * @param metrics metrics to process + */ + public static void print(Logger log, S3ATestUtils.MetricDiff... metrics) { + for (S3ATestUtils.MetricDiff metric : metrics) { + log.info(metric.toString()); + } + } + + /** + * Print all metrics in a list, then reset them. + * @param log log to print the metrics to. + * @param metrics metrics to process + */ + public static void printThenReset(Logger log, + S3ATestUtils.MetricDiff... metrics) { + print(log, metrics); + reset(metrics); + } + + /** + * Helper class to do diffs of metrics. + */ + public static final class MetricDiff { + private final S3AFileSystem fs; + private final Statistic statistic; + private long startingValue; + + /** + * Constructor. + * Invokes {@link #reset()} so it is immediately capable of measuring the + * difference in metric values. + * + * @param fs the filesystem to monitor + * @param statistic the statistic to monitor. + */ + public MetricDiff(S3AFileSystem fs, Statistic statistic) { + this.fs = fs; + this.statistic = statistic; + reset(); + } + + /** + * Reset the starting value to the current value. + * Diffs will be against this new value. + */ + public void reset() { + startingValue = currentValue(); + } + + /** + * Get the current value of the metric. + * @return the latest value. + */ + public long currentValue() { + return fs.getInstrumentation().getCounterValue(statistic); + } + + /** + * Get the difference between the the current value and + * {@link #startingValue}. + * @return the difference. + */ + public long diff() { + return currentValue() - startingValue; + } + + @Override + public String toString() { + long c = currentValue(); + final StringBuilder sb = new StringBuilder(statistic.getSymbol()); + sb.append(" starting=").append(startingValue); + sb.append(" current=").append(c); + sb.append(" diff=").append(c - startingValue); + return sb.toString(); + } + + /** + * Assert that the value of {@link #diff()} matches that expected. + * @param expected expected value. + */ + public void assertDiffEquals(long expected) { + Assert.assertEquals("Count of " + this, + expected, diff()); + } + + /** + * Assert that the value of {@link #diff()} matches that of another + * instance. + * @param that the other metric diff instance. + */ + public void assertDiffEquals(MetricDiff that) { + Assert.assertEquals(this.toString() + " != " + that, + this.diff(), that.diff()); + } + + /** + * Comparator for assertions. + * @param that other metric diff + * @return true if the value is {@code ==} the other's + */ + public boolean diffEquals(MetricDiff that) { + return this.currentValue() == that.currentValue(); + } + + /** + * Comparator for assertions. + * @param that other metric diff + * @return true if the value is {@code <} the other's + */ + public boolean diffLessThan(MetricDiff that) { + return this.currentValue() < that.currentValue(); + } + + /** + * Comparator for assertions. + * @param that other metric diff + * @return true if the value is {@code <=} the other's + */ + public boolean diffLessThanOrEquals(MetricDiff that) { + return this.currentValue() <= that.currentValue(); + } + + /** + * Get the statistic + * @return the statistic + */ + public Statistic getStatistic() { + return statistic; + } + + /** + * Get the starting value; that set in the last {@link #reset()}. + * @return the starting value for diffs. + */ + public long getStartingValue() { + return startingValue; + } + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java new file mode 100644 index 0000000..0a8dd2d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.AbstractFSContractTestBase; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.contract.s3a.S3AContract; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileNotFoundException; +import java.net.URI; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.apache.hadoop.fs.s3a.Statistic.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.MetricDiff; +import static org.apache.hadoop.test.GenericTestUtils.getTestDir; + +/** + * Use metrics to assert about the cost of file status queries. + * {@link S3AFileSystem#getFileStatus(Path)}. + */ +public class TestS3AFileOperationCost extends AbstractFSContractTestBase { + + private MetricDiff metadataRequests; + private MetricDiff listRequests; + + private static final Logger LOG = + LoggerFactory.getLogger(TestS3AFileOperationCost.class); + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + + @Override + public S3AFileSystem getFileSystem() { + return (S3AFileSystem) super.getFileSystem(); + } + + @Override + public void setup() throws Exception { + super.setup(); + S3AFileSystem fs = getFileSystem(); + metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS); + listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS); + } + + @Test + public void testCostOfGetFileStatusOnFile() throws Throwable { + describe("performing getFileStatus on a file"); + Path simpleFile = path("simple.txt"); + S3AFileSystem fs = getFileSystem(); + touch(fs, simpleFile); + resetMetricDiffs(); + S3AFileStatus status = fs.getFileStatus(simpleFile); + assertTrue("not a file: " + status, status.isFile()); + metadataRequests.assertDiffEquals(1); + listRequests.assertDiffEquals(0); + } + + private void resetMetricDiffs() { + reset(metadataRequests, listRequests); + } + + @Test + public void testCostOfGetFileStatusOnEmptyDir() throws Throwable { + describe("performing getFileStatus on an empty directory"); + S3AFileSystem fs = getFileSystem(); + Path dir = path("empty"); + fs.mkdirs(dir); + resetMetricDiffs(); + S3AFileStatus status = fs.getFileStatus(dir); + assertTrue("not empty: " + status, status.isEmptyDirectory()); + metadataRequests.assertDiffEquals(2); + listRequests.assertDiffEquals(0); + } + + @Test + public void testCostOfGetFileStatusOnMissingFile() throws Throwable { + describe("performing getFileStatus on a missing file"); + S3AFileSystem fs = getFileSystem(); + Path path = path("missing"); + resetMetricDiffs(); + try { + S3AFileStatus status = fs.getFileStatus(path); + fail("Got a status back from a missing file path " + status); + } catch (FileNotFoundException expected) { + // expected + } + metadataRequests.assertDiffEquals(2); + listRequests.assertDiffEquals(1); + } + + @Test + public void testCostOfGetFileStatusOnMissingSubPath() throws Throwable { + describe("performing getFileStatus on a missing file"); + S3AFileSystem fs = getFileSystem(); + Path path = path("missingdir/missingpath"); + resetMetricDiffs(); + try { + S3AFileStatus status = fs.getFileStatus(path); + fail("Got a status back from a missing file path " + status); + } catch (FileNotFoundException expected) { + // expected + } + metadataRequests.assertDiffEquals(2); + listRequests.assertDiffEquals(1); + } + + @Test + public void testCostOfGetFileStatusOnNonEmptyDir() throws Throwable { + describe("performing getFileStatus on a non-empty directory"); + S3AFileSystem fs = getFileSystem(); + Path dir = path("empty"); + fs.mkdirs(dir); + Path simpleFile = new Path(dir, "simple.txt"); + touch(fs, simpleFile); + resetMetricDiffs(); + S3AFileStatus status = fs.getFileStatus(dir); + if (status.isEmptyDirectory()) { + // erroneous state + String fsState = fs.toString(); + fail("FileStatus says directory isempty: " + status + + "\n" + ContractTestUtils.ls(fs, dir) + + "\n" + fsState); + } + metadataRequests.assertDiffEquals(2); + listRequests.assertDiffEquals(1); + } + + @Test + public void testCostOfCopyFromLocalFile() throws Throwable { + describe("testCostOfCopyFromLocalFile"); + File localTestDir = getTestDir("tmp"); + localTestDir.mkdirs(); + File tmpFile = File.createTempFile("tests3acost", ".txt", + localTestDir); + tmpFile.delete(); + try { + URI localFileURI = tmpFile.toURI(); + FileSystem localFS = FileSystem.get(localFileURI, + getFileSystem().getConf()); + Path localPath = new Path(localFileURI); + int len = 10 * 1024; + byte[] data = dataset(len, 'A', 'Z'); + writeDataset(localFS, localPath, data, len, 1024, true); + S3AFileSystem s3a = getFileSystem(); + MetricDiff copyLocalOps = new MetricDiff(s3a, + INVOCATION_COPY_FROM_LOCAL_FILE); + MetricDiff putRequests = new MetricDiff(s3a, + OBJECT_PUT_REQUESTS); + MetricDiff putBytes = new MetricDiff(s3a, + OBJECT_PUT_BYTES); + + Path remotePath = path("copied"); + s3a.copyFromLocalFile(false, true, localPath, remotePath); + verifyFileContents(s3a, remotePath, data); + copyLocalOps.assertDiffEquals(1); + putRequests.assertDiffEquals(1); + putBytes.assertDiffEquals(len); + // print final stats + LOG.info("Filesystem {}", s3a); + } finally { + tmpFile.delete(); + } + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java index d65f693..21639b1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -34,13 +34,11 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.rules.TestName; +import org.junit.rules.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.InputStream; -import java.util.Locale; - -import static org.junit.Assume.assumeTrue; /** * Base class for scale tests; here is where the common scale configuration @@ -51,11 +49,57 @@ public class S3AScaleTestBase extends Assert implements S3ATestConstants { @Rule public TestName methodName = new TestName(); + @Rule + public Timeout testTimeout = new Timeout(30 * 60 * 1000); + @BeforeClass public static void nameThread() { Thread.currentThread().setName("JUnit"); } + /** + * The number of operations to perform: {@value}. + */ + public static final String KEY_OPERATION_COUNT = + SCALE_TEST + "operation.count"; + + /** + * The number of directory operations to perform: {@value}. + */ + public static final String KEY_DIRECTORY_COUNT = + SCALE_TEST + "directory.count"; + + /** + * The readahead buffer: {@value}. + */ + public static final String KEY_READ_BUFFER_SIZE = + S3A_SCALE_TEST + "read.buffer.size"; + + public static final int DEFAULT_READ_BUFFER_SIZE = 16384; + + /** + * Key for a multi MB test file: {@value}. + */ + public static final String KEY_CSVTEST_FILE = + S3A_SCALE_TEST + "csvfile"; + + /** + * Default path for the multi MB test file: {@value}. + */ + public static final String DEFAULT_CSVTEST_FILE + = "s3a://landsat-pds/scene_list.gz"; + + /** + * The default number of operations to perform: {@value}. + */ + public static final long DEFAULT_OPERATION_COUNT = 2005; + + /** + * Default number of directories to create when performing + * directory performance/scale tests. + */ + public static final int DEFAULT_DIRECTORY_COUNT = 2; + protected S3AFileSystem fs; protected static final Logger LOG = @@ -132,108 +176,4 @@ public class S3AScaleTestBase extends Assert implements S3ATestConstants { } } - /** - * Make times more readable, by adding a "," every three digits. - * @param nanos nanos or other large number - * @return a string for logging - */ - protected static String toHuman(long nanos) { - return String.format(Locale.ENGLISH, "%,d", nanos); - } - - /** - * Log the bandwidth of a timer as inferred from the number of - * bytes processed. - * @param timer timer - * @param bytes bytes processed in the time period - */ - protected void bandwidth(NanoTimer timer, long bytes) { - LOG.info("Bandwidth = {} MB/S", - timer.bandwidthDescription(bytes)); - } - - /** - * Work out the bandwidth in MB/s - * @param bytes bytes - * @param durationNS duration in nanos - * @return the number of megabytes/second of the recorded operation - */ - public static double bandwidthMBs(long bytes, long durationNS) { - return (bytes * 1000.0 ) / durationNS; - } - - /** - * A simple class for timing operations in nanoseconds, and for - * printing some useful results in the process. - */ - protected static class NanoTimer { - final long startTime; - long endTime; - - public NanoTimer() { - startTime = now(); - } - - /** - * End the operation - * @return the duration of the operation - */ - public long end() { - endTime = now(); - return duration(); - } - - /** - * End the operation; log the duration - * @param format message - * @param args any arguments - * @return the duration of the operation - */ - public long end(String format, Object... args) { - long d = end(); - LOG.info("Duration of {}: {} nS", - String.format(format, args), toHuman(d)); - return d; - } - - long now() { - return System.nanoTime(); - } - - long duration() { - return endTime - startTime; - } - - double bandwidth(long bytes) { - return S3AScaleTestBase.bandwidthMBs(bytes, duration()); - } - - /** - * Bandwidth as bytes per second - * @param bytes bytes in - * @return the number of bytes per second this operation timed. - */ - double bandwidthBytes(long bytes) { - return (bytes * 1.0 ) / duration(); - } - - /** - * How many nanoseconds per byte - * @param bytes bytes processed in this time period - * @return the nanoseconds it took each byte to be processed - */ - long nanosPerByte(long bytes) { - return duration() / bytes; - } - - /** - * Get a description of the bandwidth, even down to fractions of - * a MB - * @param bytes bytes processed - * @return bandwidth - */ - String bandwidthDescription(long bytes) { - return String.format("%,.6f", bandwidth(bytes)); - } - } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java index af1883e..5e07dcb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java @@ -20,9 +20,7 @@ package org.apache.hadoop.fs.s3a.scale; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,15 +32,13 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import static org.junit.Assert.assertEquals; - +/** + * Test some scalable operations related to file renaming and deletion. + */ public class TestS3ADeleteManyFiles extends S3AScaleTestBase { private static final Logger LOG = LoggerFactory.getLogger(TestS3ADeleteManyFiles.class); - @Rule - public Timeout testTimeout = new Timeout(30 * 60 * 1000); - /** * CAUTION: If this test starts failing, please make sure that the * {@link org.apache.hadoop.fs.s3a.Constants#MAX_THREADS} configuration is not http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java new file mode 100644 index 0000000..7ece394 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.scale; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Statistic; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +import static org.apache.hadoop.fs.s3a.Statistic.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.fs.contract.ContractTestUtils.*; + +/** + * Test the performance of listing files/directories. + */ +public class TestS3ADirectoryPerformance extends S3AScaleTestBase { + private static final Logger LOG = LoggerFactory.getLogger( + TestS3ADirectoryPerformance.class); + + @Test + public void testListOperations() throws Throwable { + describe("Test recursive list operations"); + final Path scaleTestDir = getTestPath(); + final Path listDir = new Path(scaleTestDir, "lists"); + + // scale factor. + int scale = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT); + int width = scale; + int depth = scale; + int files = scale; + MetricDiff metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS); + MetricDiff listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS); + MetricDiff listStatusCalls = new MetricDiff(fs, INVOCATION_LIST_FILES); + MetricDiff getFileStatusCalls = + new MetricDiff(fs, INVOCATION_GET_FILE_STATUS); + NanoTimer createTimer = new NanoTimer(); + TreeScanResults created = + createSubdirs(fs, listDir, depth, width, files, 0); + // add some empty directories + int emptyDepth = 1 * scale; + int emptyWidth = 3 * scale; + + created.add(createSubdirs(fs, listDir, emptyDepth, emptyWidth, 0, + 0, "empty", "f-", "")); + createTimer.end("Time to create %s", created); + LOG.info("Time per operation: {}", + toHuman(createTimer.nanosPerOperation(created.totalCount()))); + printThenReset(LOG, + metadataRequests, + listRequests, + listStatusCalls, + getFileStatusCalls); + + try { + // Scan the directory via an explicit tree walk. + // This is the baseline for any listing speedups. + MetricDiff treewalkMetadataRequests = + new MetricDiff(fs, OBJECT_METADATA_REQUESTS); + MetricDiff treewalkListRequests = new MetricDiff(fs, + OBJECT_LIST_REQUESTS); + MetricDiff treewalkListStatusCalls = new MetricDiff(fs, + INVOCATION_LIST_FILES); + MetricDiff treewalkGetFileStatusCalls = + new MetricDiff(fs, INVOCATION_GET_FILE_STATUS); + NanoTimer treeWalkTimer = new NanoTimer(); + TreeScanResults treewalkResults = treeWalk(fs, listDir); + treeWalkTimer.end("List status via treewalk"); + + print(LOG, + treewalkMetadataRequests, + treewalkListRequests, + treewalkListStatusCalls, + treewalkGetFileStatusCalls); + assertEquals("Files found in listFiles(recursive=true) " + + " created=" + created + " listed=" + treewalkResults, + created.getFileCount(), treewalkResults.getFileCount()); + + + // listFiles() does the recursion internally + NanoTimer listFilesRecursiveTimer = new NanoTimer(); + + TreeScanResults listFilesResults = new TreeScanResults( + fs.listFiles(listDir, true)); + + listFilesRecursiveTimer.end("listFiles(recursive=true) of %s", created); + assertEquals("Files found in listFiles(recursive=true) " + + " created=" + created + " listed=" + listFilesResults, + created.getFileCount(), listFilesResults.getFileCount()); + + treewalkListRequests.assertDiffEquals(listRequests); + printThenReset(LOG, + metadataRequests, listRequests, + listStatusCalls, getFileStatusCalls); + + NanoTimer globStatusTimer = new NanoTimer(); + FileStatus[] globStatusFiles = fs.globStatus(listDir); + globStatusTimer.end("Time to globStatus() %s", globStatusTimer); + LOG.info("Time for glob status {} entries: {}", + globStatusFiles.length, + toHuman(createTimer.duration())); + printThenReset(LOG, + metadataRequests, + listRequests, + listStatusCalls, + getFileStatusCalls); + + } finally { + // deletion at the end of the run + NanoTimer deleteTimer = new NanoTimer(); + fs.delete(listDir, true); + deleteTimer.end("Deleting directory tree"); + printThenReset(LOG, + metadataRequests, listRequests, + listStatusCalls, getFileStatusCalls); + } + } + + @Test + public void testTimeToStatEmptyDirectory() throws Throwable { + describe("Time to stat an empty directory"); + Path path = new Path(getTestPath(), "empty"); + fs.mkdirs(path); + timeToStatPath(path); + } + + @Test + public void testTimeToStatNonEmptyDirectory() throws Throwable { + describe("Time to stat a non-empty directory"); + Path path = new Path(getTestPath(), "dir"); + fs.mkdirs(path); + touch(fs, new Path(path, "file")); + timeToStatPath(path); + } + + @Test + public void testTimeToStatFile() throws Throwable { + describe("Time to stat a simple file"); + Path path = new Path(getTestPath(), "file"); + touch(fs, path); + timeToStatPath(path); + } + + @Test + public void testTimeToStatRoot() throws Throwable { + describe("Time to stat the root path"); + timeToStatPath(new Path("/")); + } + + private void timeToStatPath(Path path) throws IOException { + describe("Timing getFileStatus(\"%s\")", path); + MetricDiff metadataRequests = + new MetricDiff(fs, Statistic.OBJECT_METADATA_REQUESTS); + MetricDiff listRequests = + new MetricDiff(fs, Statistic.OBJECT_LIST_REQUESTS); + long attempts = getOperationCount(); + NanoTimer timer = new NanoTimer(); + for (long l = 0; l < attempts; l++) { + fs.getFileStatus(path); + } + timer.end("Time to execute %d getFileStatusCalls", attempts); + LOG.info("Time per call: {}", toHuman(timer.nanosPerOperation(attempts))); + LOG.info("metadata: {}", metadataRequests); + LOG.info("metadata per operation {}", metadataRequests.diff() / attempts); + LOG.info("listObjects: {}", listRequests); + LOG.info("listObjects: per operation {}", listRequests.diff() / attempts); + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java index 0c8b273..5222a4e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java @@ -36,8 +36,10 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import static org.apache.hadoop.fs.contract.ContractTestUtils.*; + /** - * Look at the performance of S3a operations + * Look at the performance of S3a operations. */ public class TestS3AInputStreamPerformance extends S3AScaleTestBase { private static final Logger LOG = LoggerFactory.getLogger( @@ -151,7 +153,7 @@ public class TestS3AInputStreamPerformance extends S3AScaleTestBase { readTimer.end("Time to read %d bytes", len); bandwidth(readTimer, count); assertEquals("Not enough bytes were read)", len, count); - long nanosPerByte = readTimer.nanosPerByte(count); + long nanosPerByte = readTimer.nanosPerOperation(count); LOG.info("An open() call has the equivalent duration of reading {} bytes", toHuman( timeOpen.duration() / nanosPerByte)); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties index bc85425..1330ed1 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties +++ b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties @@ -15,7 +15,9 @@ log4j.rootLogger=info,stdout log4j.threshold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n + +log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR # for debugging low level S3a operations, uncomment this line # log4j.logger.org.apache.hadoop.fs.s3a=DEBUG --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org