steveloughran commented on code in PR #4273: URL: https://github.com/apache/hadoop/pull/4273#discussion_r873874607
########## hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java: ########## @@ -1095,6 +1102,54 @@ public static void validateFileContent(byte[] concat, byte[][] bytes) { mismatch); } + /** + * Utility to validate vectored read results. + * @param fileRanges input ranges. + * @param originalData original data. + * @throws IOException any ioe. + */ + public static void validateVectoredReadResult(List<FileRange> fileRanges, + byte[] originalData) + throws IOException, TimeoutException { + CompletableFuture<?>[] completableFutures = new CompletableFuture<?>[fileRanges.size()]; + int i = 0; + for (FileRange res : fileRanges) { + completableFutures[i++] = res.getData(); + } + CompletableFuture<Void> combinedFuture = CompletableFuture.allOf(completableFutures); + FutureIO.awaitFuture(combinedFuture, 5, TimeUnit.MINUTES); + + for (FileRange res : fileRanges) { + CompletableFuture<ByteBuffer> data = res.getData(); + ByteBuffer buffer = FutureIO.awaitFuture(data, 5, TimeUnit.MINUTES); + assertDatasetEquals((int) res.getOffset(), "vecRead", + buffer, res.getLength(), originalData); + } + } + + + /** + * Assert that the data read matches the dataset at the given offset. + * This helps verify that the seek process is moving the read pointer + * to the correct location in the file. + * @param readOffset the offset in the file where the read began. + * @param operation operation name for the assertion. + * @param data data read in. + * @param length length of data to check. + * @param originalData original data. + */ + public static void assertDatasetEquals( + final int readOffset, final String operation, Review Comment: nit, split line ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java: ########## @@ -446,6 +455,35 @@ public void test_040_PositionedReadHugeFile() throws Throwable { toHuman(timer.nanosPerOperation(ops))); } + @Test + public void test_045_vectoredIOHugeFile() throws Throwable { + assumeHugeFileExists(); + List<FileRange> rangeList = new ArrayList<>(); + rangeList.add(new FileRangeImpl(5856368, 1167716)); + rangeList.add(new FileRangeImpl(3520861, 1167700)); + rangeList.add(new FileRangeImpl(8191913, 1167775)); + rangeList.add(new FileRangeImpl(1520861, 1167700)); + rangeList.add(new FileRangeImpl(2520861, 116770)); + rangeList.add(new FileRangeImpl(9191913, 116770)); + rangeList.add(new FileRangeImpl(2820861, 156770)); + IntFunction<ByteBuffer> allocate = new IntFunction<ByteBuffer>() { Review Comment: can't you just add a lambda expression here, in deed, passing in `ByteBuffer::allocate` should work ########## hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java: ########## @@ -1095,6 +1102,54 @@ public static void validateFileContent(byte[] concat, byte[][] bytes) { mismatch); } + /** + * Utility to validate vectored read results. + * @param fileRanges input ranges. + * @param originalData original data. + * @throws IOException any ioe. + */ + public static void validateVectoredReadResult(List<FileRange> fileRanges, + byte[] originalData) + throws IOException, TimeoutException { + CompletableFuture<?>[] completableFutures = new CompletableFuture<?>[fileRanges.size()]; + int i = 0; + for (FileRange res : fileRanges) { + completableFutures[i++] = res.getData(); + } + CompletableFuture<Void> combinedFuture = CompletableFuture.allOf(completableFutures); + FutureIO.awaitFuture(combinedFuture, 5, TimeUnit.MINUTES); + + for (FileRange res : fileRanges) { + CompletableFuture<ByteBuffer> data = res.getData(); + ByteBuffer buffer = FutureIO.awaitFuture(data, 5, TimeUnit.MINUTES); Review Comment: we shouldn't have to wait at all, if the allOf completed. ########## hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java: ########## @@ -1095,6 +1102,54 @@ public static void validateFileContent(byte[] concat, byte[][] bytes) { mismatch); } + /** + * Utility to validate vectored read results. + * @param fileRanges input ranges. + * @param originalData original data. + * @throws IOException any ioe. + */ + public static void validateVectoredReadResult(List<FileRange> fileRanges, + byte[] originalData) + throws IOException, TimeoutException { + CompletableFuture<?>[] completableFutures = new CompletableFuture<?>[fileRanges.size()]; + int i = 0; + for (FileRange res : fileRanges) { + completableFutures[i++] = res.getData(); + } + CompletableFuture<Void> combinedFuture = CompletableFuture.allOf(completableFutures); + FutureIO.awaitFuture(combinedFuture, 5, TimeUnit.MINUTES); Review Comment: the timeout should be a constant, ideally a unit in seconds in case we ever want to make it much smaller -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org