opwvhk commented on code in PR #2967: URL: https://github.com/apache/avro/pull/2967#discussion_r1651134309
########## lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java: ########## @@ -221,6 +228,37 @@ private void testSyncDiscovery(CodecFactory codec) throws IOException { reader.seek(sync); assertNotNull(reader.next()); } + // Lastly, confirm that reading (but not decoding) all blocks results in the + // same sync points + reader.sync(0); + ArrayList<Long> syncs2 = new ArrayList<>(); + while (reader.hasNext()) { + syncs2.add(reader.previousSync()); + reader.nextBlock(); + } + assertEquals(syncs, syncs2); + } + } + + private void testReadLastRecord(CodecFactory codec) throws IOException { + File file = makeFile(codec); + try (DataFileReader<Object> reader = new DataFileReader<>(file, new GenericDatumReader<>())) { + long lastBlockStart = -1; + while (reader.hasNext()) { + // This algorithm can be made more efficient by checking if the underlying + // SeekableFileInput has been fully read: if so, the last block is in + // memory, and calls to next() will decode it. + // NOTE: this depends on the current implementation of DataFileReader. Review Comment: Using a `SeekableFileInput` (checking if `tell()` and `length()` yield the same result) works even without fixing this bug, but depends on the internal implementation. The algorithm in the current test is cleaner in that it only uses the public API. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@avro.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org