Re: [PR] S3A Analytics-Accelerator: Add IoStatistics support [hadoop]

via GitHub Tue, 01 Jul 2025 06:31:39 -0700


ahmarsuhail commented on code in PR #7763:
URL: https://github.com/apache/hadoop/pull/7763#discussion_r2177600916



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAnalyticsAcceleratorStreamReading.java:
##########
@@ -194,4 +215,127 @@ public void testInvalidConfigurationThrows() throws 
Exception {
         () -> 
S3SeekableInputStreamConfiguration.fromConfiguration(connectorConfiguration));
   }
 
+  @Test
+  public void testLargeFileMultipleGets() throws Throwable {
+    describe("Large file should trigger multiple GET requests");
+
+    Path dest = writeThenReadFile("large-test-file.txt", 10 * 1024 * 1024); // 
10MB
+
+
+    try (FSDataInputStream inputStream = getFileSystem().open(dest)) {
+      IOStatistics ioStats = inputStream.getIOStatistics();
+      inputStream.readFully(new byte[(int) 
getFileSystem().getFileStatus(dest).getLen()]);
+
+      verifyStatisticCounterValue(ioStats, STREAM_READ_ANALYTICS_GET_REQUESTS, 
2);
+    }
+  }
+
+  @Test
+  public void testSmallFileSingleGet() throws Throwable {
+    describe("Small file should trigger only one GET request");
+
+    Path dest = writeThenReadFile("small-test-file.txt", 1 * 1024 * 1024); // 
1KB
+
+    try (FSDataInputStream inputStream = getFileSystem().open(dest)) {
+      IOStatistics ioStats = inputStream.getIOStatistics();
+      inputStream.readFully(new byte[(int) 
getFileSystem().getFileStatus(dest).getLen()]);
+
+      verifyStatisticCounterValue(ioStats, STREAM_READ_ANALYTICS_GET_REQUESTS, 
1);
+    }
+  }
+
+
+  @Test
+  public void testRandomSeekPatternGets() throws Throwable {
+    describe("Random seek pattern should optimize GET requests");
+
+    Path dest = writeThenReadFile("seek-test.txt", 100 * 1024);
+
+    try (FSDataInputStream inputStream = getFileSystem().open(dest)) {
+      IOStatistics ioStats = inputStream.getIOStatistics();
+
+      inputStream.seek(1000);
+      inputStream.read(new byte[100]);
+
+      inputStream.seek(50000);
+      inputStream.read(new byte[100]);
+
+      inputStream.seek(90000);
+      inputStream.read(new byte[100]);
+
+      verifyStatisticCounterValue(ioStats, STREAM_READ_ANALYTICS_GET_REQUESTS, 
1);
+    }
+}
+
+  @Test
+  public void testAALNeverMakesHeadRequests() throws Throwable {
+    describe("Prove AAL never makes HEAD requests - S3A provides all 
metadata");
+
+    Path dest = writeThenReadFile("no-head-test.txt", 1024 * 1024); // 1MB
+
+    try (FSDataInputStream inputStream = getFileSystem().open(dest)) {
+      IOStatistics ioStats = inputStream.getIOStatistics();
+      inputStream.read(new byte[1024]);
+
+      verifyStatisticCounterValue(ioStats, 
STREAM_READ_ANALYTICS_HEAD_REQUESTS, 0);
+      verifyStatisticCounterValue(ioStats, STREAM_READ_ANALYTICS_OPENED, 1);
+
+      ObjectInputStream objectInputStream = (ObjectInputStream) 
inputStream.getWrappedStream();
+      
Assertions.assertThat(objectInputStream.streamType()).isEqualTo(InputStreamType.Analytics);
+
+    }
+  }
+
+
+  @Test
+  public void testParquetReadingNoHeadRequests() throws Throwable {
+    describe("Parquet-optimized reading should not trigger AAL HEAD requests");
+
+    Path dest = path("parquet-head-test.parquet");
+    File file = new File("src/test/resources/multi_row_group.parquet");
+    Path sourcePath = new Path(file.toURI().getPath());
+    getFileSystem().copyFromLocalFile(false, true, sourcePath, dest);
+
+    try (FSDataInputStream stream = getFileSystem().openFile(dest)
+            .must(FS_OPTION_OPENFILE_READ_POLICY, 
FS_OPTION_OPENFILE_READ_POLICY_PARQUET)
+            .build().get()) {
+
+      FileStatus fileStatus = getFileSystem().getFileStatus(dest);
+      stream.readFully(new byte[(int) fileStatus.getLen()]);
+
+      IOStatistics stats = stream.getIOStatistics();
+
+      verifyStatisticCounterValue(stats, STREAM_READ_ANALYTICS_HEAD_REQUESTS, 
0);
+      verifyStatisticCounterValue(stats, STREAM_READ_ANALYTICS_OPENED, 1);
+
+      verifyStatisticCounterValue(stats, STREAM_READ_ANALYTICS_GET_REQUESTS, 
1);
+    }
+  }
+
+
+  @Test
+  public void testConcurrentStreamsNoDuplicateGets() throws Throwable {
+    describe("Concurrent streams reading same object should not duplicate 
GETs");
+
+    Path dest = writeThenReadFile("concurrent-test.txt", 1 * 1024 * 1024);
+
+    try (FSDataInputStream stream1 = getFileSystem().open(dest);

Review Comment:
   this isn't concurrent, it's sequential. stream2.read will always happen 
after 1. so GET counts from stream2.read will always be 0.
   
   you can update your test to assert that stream2 made 0 requests, and stream 
1 made 1 request.
   
   or you can make this truly concurrent doing a threadPool.submit() 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Re: [PR] S3A Analytics-Accelerator: Add IoStatistics support [hadoop]

Reply via email to