This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 908f41e85 ORC-2001: Add method descriptions to all public Java
interfaces
908f41e85 is described below
commit 908f41e85b8d9485fec2414ea091b4604106c839
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Sep 18 13:47:40 2025 -0700
ORC-2001: Add method descriptions to all public Java interfaces
### What changes were proposed in this pull request?
This PR aims to add method descriptions to all public Java interfaces.
### Why are the changes needed?
Currently, Apache ORC has the method description inconsistently. We had
better be consistent and enrich the explanation for those methods for the
developers and users.
### How was this patch tested?
Manual reviews.
### Was this patch authored or co-authored using generative AI tooling?
Yes, I asked `gemini-2.5-pro` to add method descriptions to all public Java
interfaces.
Closes #2407 from dongjoon-hyun/ORC-2001.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../apache/orc/bench/core/convert/BatchReader.java | 6 +++
.../apache/orc/bench/core/convert/BatchWriter.java | 5 +++
.../orc/bench/core/convert/avro/AvroReader.java | 6 +++
.../orc/bench/core/convert/avro/AvroWriter.java | 6 +++
.../org/apache/orc/BinaryColumnStatistics.java | 4 ++
.../org/apache/orc/BooleanColumnStatistics.java | 8 ++++
java/core/src/java/org/apache/orc/DataReader.java | 6 +++
.../org/apache/orc/GeospatialColumnStatistics.java | 9 +++++
java/core/src/java/org/apache/orc/OrcFile.java | 14 +++++++
.../src/java/org/apache/orc/impl/Dictionary.java | 23 +++++++++++
.../apache/orc/impl/DirectDecompressionCodec.java | 11 ++++++
.../java/org/apache/orc/impl/IntegerWriter.java | 4 ++
.../java/org/apache/orc/impl/PositionProvider.java | 4 ++
.../java/org/apache/orc/impl/PositionRecorder.java | 4 ++
.../org/apache/orc/impl/TreeReaderFactory.java | 45 +++++++++++++++++++++
.../apache/orc/impl/reader/tree/TypeReader.java | 46 ++++++++++++++++++++++
.../org/apache/orc/impl/writer/WriterContext.java | 12 ++++++
java/core/src/test/org/apache/orc/TestConf.java | 3 ++
.../src/java/org/apache/orc/impl/HadoopShims.java | 17 ++++++++
.../src/java/org/apache/orc/impl/KeyProvider.java | 8 ++++
20 files changed, 241 insertions(+)
diff --git
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java
index 9a127ffda..72d561948 100644
---
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java
+++
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java
@@ -27,6 +27,12 @@ import java.io.IOException;
*/
public interface BatchReader extends AutoCloseable {
+ /**
+ * Read the next batch of rows.
+ * @param batch the batch to read into
+ * @return true if a batch was read
+ * @throws IOException if there is an error reading the batch
+ */
boolean nextBatch(VectorizedRowBatch batch) throws IOException;
@Override
diff --git
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java
index 2d75ee1e6..9836690c8 100644
---
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java
+++
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java
@@ -28,6 +28,11 @@ import java.io.IOException;
*/
public interface BatchWriter extends Closeable {
+ /**
+ * Write the next batch of rows.
+ * @param batch the batch to write from
+ * @throws IOException if there is an error writing the batch
+ */
void writeBatch(VectorizedRowBatch batch) throws IOException;
@Override
diff --git
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
index 8474351f2..1db2b4b69 100644
---
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
+++
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
@@ -79,6 +79,12 @@ public class AvroReader implements BatchReader {
}
public interface AvroConverter {
+ /**
+ * Convert a value from Avro to ORC.
+ * @param vector the column vector to put the value into
+ * @param row the row to update
+ * @param value the value to convert
+ */
void convert(ColumnVector vector, int row, Object value);
}
diff --git
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
index 34fa16667..8a7d80f4b 100644
---
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
+++
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
@@ -47,6 +47,12 @@ import java.util.List;
public class AvroWriter implements BatchWriter {
public interface AvroConverter {
+ /**
+ * Convert a value from ORC to Avro.
+ * @param vector the column vector to get the value from
+ * @param row the row to get
+ * @return the Avro value
+ */
Object convert(ColumnVector vector, int row);
}
diff --git a/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java
b/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java
index b01c7da09..77453b9c2 100644
--- a/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java
@@ -21,5 +21,9 @@ package org.apache.orc;
* Statistics for binary columns.
*/
public interface BinaryColumnStatistics extends ColumnStatistics {
+ /**
+ * Get the sum of the lengths of the binary values.
+ * @return the sum of the lengths
+ */
long getSum();
}
diff --git a/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java
b/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java
index 41a7a5245..ec51db2ba 100644
--- a/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java
@@ -21,7 +21,15 @@ package org.apache.orc;
* Statistics for boolean columns.
*/
public interface BooleanColumnStatistics extends ColumnStatistics {
+ /**
+ * Get the number of false values.
+ * @return the number of false values
+ */
long getFalseCount();
+ /**
+ * Get the number of true values.
+ * @return the number of true values
+ */
long getTrueCount();
}
diff --git a/java/core/src/java/org/apache/orc/DataReader.java
b/java/core/src/java/org/apache/orc/DataReader.java
index d6f61016b..a477866f5 100644
--- a/java/core/src/java/org/apache/orc/DataReader.java
+++ b/java/core/src/java/org/apache/orc/DataReader.java
@@ -30,6 +30,12 @@ public interface DataReader extends AutoCloseable, Cloneable
{
/** Opens the DataReader, making it ready to use. */
void open() throws IOException;
+ /**
+ * Read the stripe footer.
+ * @param stripe the stripe to read
+ * @return the stripe footer
+ * @throws IOException if there is an error reading the stripe footer
+ */
OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws
IOException;
/**
diff --git a/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
b/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
index db66084c1..301b62598 100644
--- a/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
@@ -22,6 +22,15 @@ import org.apache.orc.geospatial.BoundingBox;
import org.apache.orc.geospatial.GeospatialTypes;
public interface GeospatialColumnStatistics extends ColumnStatistics {
+ /**
+ * Get the bounding box for the column.
+ * @return the bounding box
+ */
BoundingBox getBoundingBox();
+
+ /**
+ * Get the geospatial types for the column.
+ * @return the geospatial types
+ */
GeospatialTypes getGeospatialTypes();
}
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java
b/java/core/src/java/org/apache/orc/OrcFile.java
index 2daba452c..8c61b0065 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -391,11 +391,25 @@ public class OrcFile {
}
public interface WriterContext {
+ /**
+ * Get the writer.
+ * @return the writer
+ */
Writer getWriter();
}
public interface WriterCallback {
+ /**
+ * A callback before the stripe is written.
+ * @param context the writer context
+ * @throws IOException if there is an error
+ */
void preStripeWrite(WriterContext context) throws IOException;
+ /**
+ * A callback before the footer is written.
+ * @param context the writer context
+ * @throws IOException if there is an error
+ */
void preFooterWrite(WriterContext context) throws IOException;
}
diff --git a/java/core/src/java/org/apache/orc/impl/Dictionary.java
b/java/core/src/java/org/apache/orc/impl/Dictionary.java
index 430f343dd..18e990b8f 100644
--- a/java/core/src/java/org/apache/orc/impl/Dictionary.java
+++ b/java/core/src/java/org/apache/orc/impl/Dictionary.java
@@ -42,6 +42,9 @@ public interface Dictionary {
*/
void visit(Visitor visitor) throws IOException;
+ /**
+ * Clear the dictionary.
+ */
void clear();
/**
@@ -53,6 +56,11 @@ public interface Dictionary {
*/
void getText(Text result, int position);
+ /**
+ * Given the position index, return the original string before being encoded.
+ * @param position the position where the key was added
+ * @return the original string
+ */
ByteBuffer getText(int position);
/**
@@ -66,10 +74,25 @@ public interface Dictionary {
*/
int writeTo(OutputStream out, int position) throws IOException;
+ /**
+ * Add a new key to the dictionary.
+ * @param bytes the bytes of the string to add
+ * @param offset the offset of the string
+ * @param length the length of the string
+ * @return the position of the key in the dictionary
+ */
int add(byte[] bytes, int offset, int length);
+ /**
+ * Get the number of entries in the dictionary.
+ * @return the number of entries
+ */
int size();
+ /**
+ * Get the size of the dictionary in bytes.
+ * @return the size in bytes
+ */
long getSizeInBytes();
/**
diff --git
a/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
b/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
index ea1965304..a285cec9c 100644
--- a/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
+++ b/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
@@ -23,6 +23,17 @@ import java.io.IOException;
import java.nio.ByteBuffer;
public interface DirectDecompressionCodec extends CompressionCodec {
+ /**
+ * Check if the codec is available.
+ * @return true if the codec is available
+ */
boolean isAvailable();
+
+ /**
+ * Decompress the in buffer to the out buffer.
+ * @param in the bytes to decompress
+ * @param out the decompressed bytes
+ * @throws IOException if there is an error
+ */
void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
}
diff --git a/java/core/src/java/org/apache/orc/impl/IntegerWriter.java
b/java/core/src/java/org/apache/orc/impl/IntegerWriter.java
index 9f3b804ff..e09e80c96 100644
--- a/java/core/src/java/org/apache/orc/impl/IntegerWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/IntegerWriter.java
@@ -52,5 +52,9 @@ public interface IntegerWriter {
*/
long estimateMemory();
+ /**
+ * Change the initialization vector for the encryption.
+ * @param modifier the function to modify the IV
+ */
void changeIv(Consumer<byte[]> modifier);
}
diff --git a/java/core/src/java/org/apache/orc/impl/PositionProvider.java
b/java/core/src/java/org/apache/orc/impl/PositionProvider.java
index 2cbcb7300..f13f9a4e1 100644
--- a/java/core/src/java/org/apache/orc/impl/PositionProvider.java
+++ b/java/core/src/java/org/apache/orc/impl/PositionProvider.java
@@ -22,5 +22,9 @@ package org.apache.orc.impl;
* An interface used for seeking to a row index.
*/
public interface PositionProvider {
+ /**
+ * Get the next position.
+ * @return the next position
+ */
long getNext();
}
diff --git a/java/core/src/java/org/apache/orc/impl/PositionRecorder.java
b/java/core/src/java/org/apache/orc/impl/PositionRecorder.java
index 287aabba9..ed9864044 100644
--- a/java/core/src/java/org/apache/orc/impl/PositionRecorder.java
+++ b/java/core/src/java/org/apache/orc/impl/PositionRecorder.java
@@ -21,5 +21,9 @@ package org.apache.orc.impl;
* An interface for recording positions in a stream.
*/
public interface PositionRecorder {
+ /**
+ * Add a position to the stream.
+ * @param offset the offset to add
+ */
void addPosition(long offset);
}
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 785f568ff..e9122a1c6 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -66,26 +66,71 @@ import java.util.function.Consumer;
*/
public class TreeReaderFactory {
public interface Context {
+ /**
+ * Get the schema evolution for the reader.
+ * @return the schema evolution
+ */
SchemaEvolution getSchemaEvolution();
+ /**
+ * Get the set of column ids that are filtered.
+ * @return the set of column ids
+ */
Set<Integer> getColumnFilterIds();
+ /**
+ * Get the callback for column filtering.
+ * @return the callback
+ */
Consumer<OrcFilterContext> getColumnFilterCallback();
+ /**
+ * Check if corrupt records should be skipped.
+ * @return true if corrupt records should be skipped
+ */
boolean isSkipCorrupt();
+ /**
+ * Check if UTC timestamp should be used.
+ * @return true if UTC timestamp should be used
+ */
boolean getUseUTCTimestamp();
+ /**
+ * Get the timezone of the writer.
+ * @return the timezone of the writer
+ */
String getWriterTimezone();
+ /**
+ * Get the file format version.
+ * @return the file format version
+ */
OrcFile.Version getFileFormat();
+ /**
+ * Get the encryption information.
+ * @return the encryption information
+ */
ReaderEncryption getEncryption();
+ /**
+ * Check if proleptic Gregorian calendar should be used.
+ * @return true if proleptic Gregorian calendar should be used
+ */
boolean useProlepticGregorian();
+ /**
+ * Check if the file was written with proleptic Gregorian calendar.
+ * @return true if the file was written with proleptic Gregorian calendar
+ */
boolean fileUsedProlepticGregorian();
+ /**
+ * Get the reader category for the given column id.
+ * @param columnId the column id
+ * @return the reader category
+ */
TypeReader.ReaderCategory getReaderCategory(int columnId);
}
diff --git a/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java
b/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java
index 5f7eadbb1..3ffb43736 100644
--- a/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java
+++ b/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java
@@ -28,24 +28,70 @@ import java.io.IOException;
import java.util.EnumSet;
public interface TypeReader {
+ /**
+ * Check the encoding of the column.
+ * @param encoding the encoding to check
+ * @throws IOException if the encoding is not supported
+ */
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException;
+ /**
+ * Start reading a stripe.
+ * @param planner the stripe planner
+ * @param readPhase the read phase
+ * @throws IOException if there is an error
+ */
void startStripe(StripePlanner planner, ReadPhase readPhase) throws
IOException;
+ /**
+ * Seek to a position in the stream.
+ * @param index the position provider
+ * @param readPhase the read phase
+ * @throws IOException if there is an error
+ */
void seek(PositionProvider[] index, ReadPhase readPhase) throws IOException;
+ /**
+ * Seek to a position in the stream.
+ * @param index the position provider
+ * @param readPhase the read phase
+ * @throws IOException if there is an error
+ */
void seek(PositionProvider index, ReadPhase readPhase) throws IOException;
+ /**
+ * Skip a number of rows.
+ * @param rows the number of rows to skip
+ * @param readPhase the read phase
+ * @throws IOException if there is an error
+ */
void skipRows(long rows, ReadPhase readPhase) throws IOException;
+ /**
+ * Read the next vector of values.
+ * @param previous the previous vector
+ * @param isNull the isNull vector
+ * @param batchSize the batch size
+ * @param filterContext the filter context
+ * @param readPhase the read phase
+ * @throws IOException if there is an error
+ */
void nextVector(ColumnVector previous,
boolean[] isNull,
int batchSize,
FilterContext filterContext,
ReadPhase readPhase) throws IOException;
+ /**
+ * Get the column id.
+ * @return the column id
+ */
int getColumnId();
+ /**
+ * Get the reader category.
+ * @return the reader category
+ */
ReaderCategory getReaderCategory();
/**
diff --git a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
index 03c31b660..c9570fef7 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
@@ -86,9 +86,21 @@ public interface WriterContext {
@Deprecated
OrcFile.BloomFilterVersion getBloomFilterVersion();
+ /**
+ * Write the row index.
+ * @param name the name of the stream
+ * @param index the index to write
+ * @throws IOException if there is an error
+ */
void writeIndex(StreamName name,
OrcProto.RowIndex.Builder index) throws IOException;
+ /**
+ * Write the bloom filter.
+ * @param name the name of the stream
+ * @param bloom the bloom filter to write
+ * @throws IOException if there is an error
+ */
void writeBloomFilter(StreamName name,
OrcProto.BloomFilterIndex.Builder bloom
) throws IOException;
diff --git a/java/core/src/test/org/apache/orc/TestConf.java
b/java/core/src/test/org/apache/orc/TestConf.java
index aedbeb8d4..2b5e9dca6 100644
--- a/java/core/src/test/org/apache/orc/TestConf.java
+++ b/java/core/src/test/org/apache/orc/TestConf.java
@@ -27,6 +27,9 @@ public interface TestConf {
Configuration conf = getNewConf();
+ /**
+ * Clear the configuration.
+ */
@BeforeEach
default void clear() {
conf.clear();
diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
index f79f35364..910533855 100644
--- a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
+++ b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
@@ -38,8 +38,20 @@ public interface HadoopShims {
}
interface DirectDecompressor {
+ /**
+ * Decompress the in buffer to the out buffer.
+ * @param var1 the bytes to decompress
+ * @param var2 the decompressed bytes
+ * @throws IOException if there is an error
+ */
void decompress(ByteBuffer var1, ByteBuffer var2) throws IOException;
+ /**
+ * Reset the decompressor.
+ */
void reset();
+ /**
+ * End the decompressor.
+ */
void end();
}
@@ -131,6 +143,11 @@ public interface HadoopShims {
*/
boolean endVariableLengthBlock(OutputStream output) throws IOException;
+ /**
+ * Check if the Hadoop version supports vectored IO.
+ * @param version the Hadoop version string
+ * @return true if the Hadoop version supports vectored IO
+ */
default boolean supportVectoredIO(String version) {
// HADOOP-18103 is available since Apache Hadoop 3.3.5+
String[] versionParts = version.split("[.-]");
diff --git a/java/shims/src/java/org/apache/orc/impl/KeyProvider.java
b/java/shims/src/java/org/apache/orc/impl/KeyProvider.java
index 23840206f..5f9f1af37 100644
--- a/java/shims/src/java/org/apache/orc/impl/KeyProvider.java
+++ b/java/shims/src/java/org/apache/orc/impl/KeyProvider.java
@@ -77,6 +77,14 @@ public interface KeyProvider {
* A service loader factory interface.
*/
interface Factory {
+ /**
+ * Create a key provider.
+ * @param kind the kind of key provider
+ * @param conf the configuration
+ * @param random a random number generator
+ * @return the key provider
+ * @throws IOException if there is an error
+ */
KeyProvider create(String kind,
Configuration conf,
Random random) throws IOException;