This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 908f41e85 ORC-2001: Add method descriptions to all public Java 
interfaces
908f41e85 is described below

commit 908f41e85b8d9485fec2414ea091b4604106c839
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Sep 18 13:47:40 2025 -0700

    ORC-2001: Add method descriptions to all public Java interfaces
    
    ### What changes were proposed in this pull request?
    
    This PR aims to add method descriptions to all public Java interfaces.
    
    ### Why are the changes needed?
    
    Currently, Apache ORC has the method description inconsistently. We had 
better be consistent and enrich the explanation for those methods for the 
developers and users.
    
    ### How was this patch tested?
    
    Manual reviews.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Yes, I asked `gemini-2.5-pro` to add method descriptions to all public Java 
interfaces.
    
    Closes #2407 from dongjoon-hyun/ORC-2001.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../apache/orc/bench/core/convert/BatchReader.java |  6 +++
 .../apache/orc/bench/core/convert/BatchWriter.java |  5 +++
 .../orc/bench/core/convert/avro/AvroReader.java    |  6 +++
 .../orc/bench/core/convert/avro/AvroWriter.java    |  6 +++
 .../org/apache/orc/BinaryColumnStatistics.java     |  4 ++
 .../org/apache/orc/BooleanColumnStatistics.java    |  8 ++++
 java/core/src/java/org/apache/orc/DataReader.java  |  6 +++
 .../org/apache/orc/GeospatialColumnStatistics.java |  9 +++++
 java/core/src/java/org/apache/orc/OrcFile.java     | 14 +++++++
 .../src/java/org/apache/orc/impl/Dictionary.java   | 23 +++++++++++
 .../apache/orc/impl/DirectDecompressionCodec.java  | 11 ++++++
 .../java/org/apache/orc/impl/IntegerWriter.java    |  4 ++
 .../java/org/apache/orc/impl/PositionProvider.java |  4 ++
 .../java/org/apache/orc/impl/PositionRecorder.java |  4 ++
 .../org/apache/orc/impl/TreeReaderFactory.java     | 45 +++++++++++++++++++++
 .../apache/orc/impl/reader/tree/TypeReader.java    | 46 ++++++++++++++++++++++
 .../org/apache/orc/impl/writer/WriterContext.java  | 12 ++++++
 java/core/src/test/org/apache/orc/TestConf.java    |  3 ++
 .../src/java/org/apache/orc/impl/HadoopShims.java  | 17 ++++++++
 .../src/java/org/apache/orc/impl/KeyProvider.java  |  8 ++++
 20 files changed, 241 insertions(+)

diff --git 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java
index 9a127ffda..72d561948 100644
--- 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java
+++ 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchReader.java
@@ -27,6 +27,12 @@ import java.io.IOException;
  */
 public interface BatchReader extends AutoCloseable {
 
+  /**
+   * Read the next batch of rows.
+   * @param batch the batch to read into
+   * @return true if a batch was read
+   * @throws IOException if there is an error reading the batch
+   */
   boolean nextBatch(VectorizedRowBatch batch) throws IOException;
 
   @Override
diff --git 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java
index 2d75ee1e6..9836690c8 100644
--- 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java
+++ 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/BatchWriter.java
@@ -28,6 +28,11 @@ import java.io.IOException;
  */
 public interface BatchWriter extends Closeable {
 
+  /**
+   * Write the next batch of rows.
+   * @param batch the batch to write from
+   * @throws IOException if there is an error writing the batch
+   */
   void writeBatch(VectorizedRowBatch batch) throws IOException;
 
   @Override
diff --git 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
index 8474351f2..1db2b4b69 100644
--- 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
+++ 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
@@ -79,6 +79,12 @@ public class AvroReader implements BatchReader {
   }
 
   public interface AvroConverter {
+    /**
+     * Convert a value from Avro to ORC.
+     * @param vector the column vector to put the value into
+     * @param row the row to update
+     * @param value the value to convert
+     */
     void convert(ColumnVector vector, int row, Object value);
   }
 
diff --git 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
index 34fa16667..8a7d80f4b 100644
--- 
a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
+++ 
b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
@@ -47,6 +47,12 @@ import java.util.List;
 public class AvroWriter implements BatchWriter {
 
   public interface AvroConverter {
+    /**
+     * Convert a value from ORC to Avro.
+     * @param vector the column vector to get the value from
+     * @param row the row to get
+     * @return the Avro value
+     */
     Object convert(ColumnVector vector, int row);
   }
 
diff --git a/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java 
b/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java
index b01c7da09..77453b9c2 100644
--- a/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/BinaryColumnStatistics.java
@@ -21,5 +21,9 @@ package org.apache.orc;
  * Statistics for binary columns.
  */
 public interface BinaryColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the sum of the lengths of the binary values.
+   * @return the sum of the lengths
+   */
   long getSum();
 }
diff --git a/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java 
b/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java
index 41a7a5245..ec51db2ba 100644
--- a/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/BooleanColumnStatistics.java
@@ -21,7 +21,15 @@ package org.apache.orc;
  * Statistics for boolean columns.
  */
 public interface BooleanColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the number of false values.
+   * @return the number of false values
+   */
   long getFalseCount();
 
+  /**
+   * Get the number of true values.
+   * @return the number of true values
+   */
   long getTrueCount();
 }
diff --git a/java/core/src/java/org/apache/orc/DataReader.java 
b/java/core/src/java/org/apache/orc/DataReader.java
index d6f61016b..a477866f5 100644
--- a/java/core/src/java/org/apache/orc/DataReader.java
+++ b/java/core/src/java/org/apache/orc/DataReader.java
@@ -30,6 +30,12 @@ public interface DataReader extends AutoCloseable, Cloneable 
{
   /** Opens the DataReader, making it ready to use. */
   void open() throws IOException;
 
+  /**
+   * Read the stripe footer.
+   * @param stripe the stripe to read
+   * @return the stripe footer
+   * @throws IOException if there is an error reading the stripe footer
+   */
   OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws 
IOException;
 
   /**
diff --git a/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java 
b/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
index db66084c1..301b62598 100644
--- a/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
@@ -22,6 +22,15 @@ import org.apache.orc.geospatial.BoundingBox;
 import org.apache.orc.geospatial.GeospatialTypes;
 
 public interface GeospatialColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the bounding box for the column.
+   * @return the bounding box
+   */
   BoundingBox getBoundingBox();
+
+  /**
+   * Get the geospatial types for the column.
+   * @return the geospatial types
+   */
   GeospatialTypes getGeospatialTypes();
 }
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java 
b/java/core/src/java/org/apache/orc/OrcFile.java
index 2daba452c..8c61b0065 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -391,11 +391,25 @@ public class OrcFile {
   }
 
   public interface WriterContext {
+    /**
+     * Get the writer.
+     * @return the writer
+     */
     Writer getWriter();
   }
 
   public interface WriterCallback {
+    /**
+     * A callback before the stripe is written.
+     * @param context the writer context
+     * @throws IOException if there is an error
+     */
     void preStripeWrite(WriterContext context) throws IOException;
+    /**
+     * A callback before the footer is written.
+     * @param context the writer context
+     * @throws IOException if there is an error
+     */
     void preFooterWrite(WriterContext context) throws IOException;
   }
 
diff --git a/java/core/src/java/org/apache/orc/impl/Dictionary.java 
b/java/core/src/java/org/apache/orc/impl/Dictionary.java
index 430f343dd..18e990b8f 100644
--- a/java/core/src/java/org/apache/orc/impl/Dictionary.java
+++ b/java/core/src/java/org/apache/orc/impl/Dictionary.java
@@ -42,6 +42,9 @@ public interface Dictionary {
    */
   void visit(Visitor visitor) throws IOException;
 
+  /**
+   * Clear the dictionary.
+   */
   void clear();
 
   /**
@@ -53,6 +56,11 @@ public interface Dictionary {
    */
   void getText(Text result, int position);
 
+  /**
+   * Given the position index, return the original string before being encoded.
+   * @param position the position where the key was added
+   * @return the original string
+   */
   ByteBuffer getText(int position);
 
   /**
@@ -66,10 +74,25 @@ public interface Dictionary {
    */
   int writeTo(OutputStream out, int position) throws IOException;
 
+  /**
+   * Add a new key to the dictionary.
+   * @param bytes the bytes of the string to add
+   * @param offset the offset of the string
+   * @param length the length of the string
+   * @return the position of the key in the dictionary
+   */
   int add(byte[] bytes, int offset, int length);
 
+  /**
+   * Get the number of entries in the dictionary.
+   * @return the number of entries
+   */
   int size();
 
+  /**
+   * Get the size of the dictionary in bytes.
+   * @return the size in bytes
+   */
   long getSizeInBytes();
 
   /**
diff --git 
a/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java 
b/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
index ea1965304..a285cec9c 100644
--- a/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
+++ b/java/core/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
@@ -23,6 +23,17 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 
 public interface DirectDecompressionCodec extends CompressionCodec {
+  /**
+   * Check if the codec is available.
+   * @return true if the codec is available
+   */
   boolean isAvailable();
+
+  /**
+   * Decompress the in buffer to the out buffer.
+   * @param in the bytes to decompress
+   * @param out the decompressed bytes
+   * @throws IOException if there is an error
+   */
   void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
 }
diff --git a/java/core/src/java/org/apache/orc/impl/IntegerWriter.java 
b/java/core/src/java/org/apache/orc/impl/IntegerWriter.java
index 9f3b804ff..e09e80c96 100644
--- a/java/core/src/java/org/apache/orc/impl/IntegerWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/IntegerWriter.java
@@ -52,5 +52,9 @@ public interface IntegerWriter {
    */
   long estimateMemory();
 
+  /**
+   * Change the initialization vector for the encryption.
+   * @param modifier the function to modify the IV
+   */
   void changeIv(Consumer<byte[]> modifier);
 }
diff --git a/java/core/src/java/org/apache/orc/impl/PositionProvider.java 
b/java/core/src/java/org/apache/orc/impl/PositionProvider.java
index 2cbcb7300..f13f9a4e1 100644
--- a/java/core/src/java/org/apache/orc/impl/PositionProvider.java
+++ b/java/core/src/java/org/apache/orc/impl/PositionProvider.java
@@ -22,5 +22,9 @@ package org.apache.orc.impl;
  * An interface used for seeking to a row index.
  */
 public interface PositionProvider {
+  /**
+   * Get the next position.
+   * @return the next position
+   */
   long getNext();
 }
diff --git a/java/core/src/java/org/apache/orc/impl/PositionRecorder.java 
b/java/core/src/java/org/apache/orc/impl/PositionRecorder.java
index 287aabba9..ed9864044 100644
--- a/java/core/src/java/org/apache/orc/impl/PositionRecorder.java
+++ b/java/core/src/java/org/apache/orc/impl/PositionRecorder.java
@@ -21,5 +21,9 @@ package org.apache.orc.impl;
  * An interface for recording positions in a stream.
  */
 public interface PositionRecorder {
+  /**
+   * Add a position to the stream.
+   * @param offset the offset to add
+   */
   void addPosition(long offset);
 }
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java 
b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 785f568ff..e9122a1c6 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -66,26 +66,71 @@ import java.util.function.Consumer;
  */
 public class TreeReaderFactory {
   public interface Context {
+    /**
+     * Get the schema evolution for the reader.
+     * @return the schema evolution
+     */
     SchemaEvolution getSchemaEvolution();
 
+    /**
+     * Get the set of column ids that are filtered.
+     * @return the set of column ids
+     */
     Set<Integer> getColumnFilterIds();
 
+    /**
+     * Get the callback for column filtering.
+     * @return the callback
+     */
     Consumer<OrcFilterContext> getColumnFilterCallback();
 
+    /**
+     * Check if corrupt records should be skipped.
+     * @return true if corrupt records should be skipped
+     */
     boolean isSkipCorrupt();
 
+    /**
+     * Check if UTC timestamp should be used.
+     * @return true if UTC timestamp should be used
+     */
     boolean getUseUTCTimestamp();
 
+    /**
+     * Get the timezone of the writer.
+     * @return the timezone of the writer
+     */
     String getWriterTimezone();
 
+    /**
+     * Get the file format version.
+     * @return the file format version
+     */
     OrcFile.Version getFileFormat();
 
+    /**
+     * Get the encryption information.
+     * @return the encryption information
+     */
     ReaderEncryption getEncryption();
 
+    /**
+     * Check if proleptic Gregorian calendar should be used.
+     * @return true if proleptic Gregorian calendar should be used
+     */
     boolean useProlepticGregorian();
 
+    /**
+     * Check if the file was written with proleptic Gregorian calendar.
+     * @return true if the file was written with proleptic Gregorian calendar
+     */
     boolean fileUsedProlepticGregorian();
 
+    /**
+     * Get the reader category for the given column id.
+     * @param columnId the column id
+     * @return the reader category
+     */
     TypeReader.ReaderCategory getReaderCategory(int columnId);
   }
 
diff --git a/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java 
b/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java
index 5f7eadbb1..3ffb43736 100644
--- a/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java
+++ b/java/core/src/java/org/apache/orc/impl/reader/tree/TypeReader.java
@@ -28,24 +28,70 @@ import java.io.IOException;
 import java.util.EnumSet;
 
 public interface TypeReader {
+  /**
+   * Check the encoding of the column.
+   * @param encoding the encoding to check
+   * @throws IOException if the encoding is not supported
+   */
   void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException;
 
+  /**
+   * Start reading a stripe.
+   * @param planner the stripe planner
+   * @param readPhase the read phase
+   * @throws IOException if there is an error
+   */
   void startStripe(StripePlanner planner, ReadPhase readPhase) throws 
IOException;
 
+  /**
+   * Seek to a position in the stream.
+   * @param index the position provider
+   * @param readPhase the read phase
+   * @throws IOException if there is an error
+   */
   void seek(PositionProvider[] index, ReadPhase readPhase) throws IOException;
 
+  /**
+   * Seek to a position in the stream.
+   * @param index the position provider
+   * @param readPhase the read phase
+   * @throws IOException if there is an error
+   */
   void seek(PositionProvider index, ReadPhase readPhase) throws IOException;
 
+  /**
+   * Skip a number of rows.
+   * @param rows the number of rows to skip
+   * @param readPhase the read phase
+   * @throws IOException if there is an error
+   */
   void skipRows(long rows, ReadPhase readPhase) throws IOException;
 
+  /**
+   * Read the next vector of values.
+   * @param previous the previous vector
+   * @param isNull the isNull vector
+   * @param batchSize the batch size
+   * @param filterContext the filter context
+   * @param readPhase the read phase
+   * @throws IOException if there is an error
+   */
   void nextVector(ColumnVector previous,
                   boolean[] isNull,
                   int batchSize,
                   FilterContext filterContext,
                   ReadPhase readPhase) throws IOException;
 
+  /**
+   * Get the column id.
+   * @return the column id
+   */
   int getColumnId();
 
+  /**
+   * Get the reader category.
+   * @return the reader category
+   */
   ReaderCategory getReaderCategory();
 
   /**
diff --git a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java 
b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
index 03c31b660..c9570fef7 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
@@ -86,9 +86,21 @@ public interface WriterContext {
   @Deprecated
   OrcFile.BloomFilterVersion getBloomFilterVersion();
 
+  /**
+   * Write the row index.
+   * @param name the name of the stream
+   * @param index the index to write
+   * @throws IOException if there is an error
+   */
   void writeIndex(StreamName name,
                   OrcProto.RowIndex.Builder index) throws IOException;
 
+  /**
+   * Write the bloom filter.
+   * @param name the name of the stream
+   * @param bloom the bloom filter to write
+   * @throws IOException if there is an error
+   */
   void writeBloomFilter(StreamName name,
                         OrcProto.BloomFilterIndex.Builder bloom
                         ) throws IOException;
diff --git a/java/core/src/test/org/apache/orc/TestConf.java 
b/java/core/src/test/org/apache/orc/TestConf.java
index aedbeb8d4..2b5e9dca6 100644
--- a/java/core/src/test/org/apache/orc/TestConf.java
+++ b/java/core/src/test/org/apache/orc/TestConf.java
@@ -27,6 +27,9 @@ public interface TestConf {
 
   Configuration conf = getNewConf();
 
+  /**
+   * Clear the configuration.
+   */
   @BeforeEach
   default void clear() {
     conf.clear();
diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java 
b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
index f79f35364..910533855 100644
--- a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
+++ b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
@@ -38,8 +38,20 @@ public interface HadoopShims {
   }
 
   interface DirectDecompressor {
+    /**
+     * Decompress the in buffer to the out buffer.
+     * @param var1 the bytes to decompress
+     * @param var2 the decompressed bytes
+     * @throws IOException if there is an error
+     */
     void decompress(ByteBuffer var1, ByteBuffer var2) throws IOException;
+    /**
+     * Reset the decompressor.
+     */
     void reset();
+    /**
+     * End the decompressor.
+     */
     void end();
   }
 
@@ -131,6 +143,11 @@ public interface HadoopShims {
    */
   boolean endVariableLengthBlock(OutputStream output) throws IOException;
 
+  /**
+   * Check if the Hadoop version supports vectored IO.
+   * @param version the Hadoop version string
+   * @return true if the Hadoop version supports vectored IO
+   */
   default boolean supportVectoredIO(String version) {
     // HADOOP-18103 is available since Apache Hadoop 3.3.5+
     String[] versionParts = version.split("[.-]");
diff --git a/java/shims/src/java/org/apache/orc/impl/KeyProvider.java 
b/java/shims/src/java/org/apache/orc/impl/KeyProvider.java
index 23840206f..5f9f1af37 100644
--- a/java/shims/src/java/org/apache/orc/impl/KeyProvider.java
+++ b/java/shims/src/java/org/apache/orc/impl/KeyProvider.java
@@ -77,6 +77,14 @@ public interface KeyProvider {
    * A service loader factory interface.
    */
   interface Factory {
+    /**
+     * Create a key provider.
+     * @param kind the kind of key provider
+     * @param conf the configuration
+     * @param random a random number generator
+     * @return the key provider
+     * @throws IOException if there is an error
+     */
     KeyProvider create(String kind,
                        Configuration conf,
                        Random random) throws IOException;

Reply via email to