This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new badcca2ebe8 [HUDI-7742] Move Hadoop-dependent reader util classes to 
hudi-hadoop-common module (#11190)
badcca2ebe8 is described below

commit badcca2ebe8c30efa3fc13cad4c3f0114101874a
Author: Y Ethan Guo <ethan.guoyi...@gmail.com>
AuthorDate: Fri May 10 14:20:00 2024 -0700

    [HUDI-7742] Move Hadoop-dependent reader util classes to hudi-hadoop-common 
module (#11190)
---
 .../action/bootstrap/OrcBootstrapMetadataHandler.java   |  2 +-
 .../common/table/log/block/HoodieHFileDataBlock.java    |  5 +++--
 .../hudi/common/testutils/HoodieTestDataGenerator.java  |  4 ----
 .../java/org/apache/hudi/common/util/AvroOrcUtils.java  |  0
 .../main/java/org/apache/hudi/common/util/OrcUtils.java |  1 +
 .../org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java  |  1 -
 .../org/apache/hudi/io/hadoop}/OrcReaderIterator.java   | 17 ++++++++++-------
 .../apache/hudi/io/storage/HoodieHBaseKVComparator.java |  0
 .../parquet/avro/HoodieAvroParquetReaderBuilder.java    |  0
 .../org/apache/parquet/avro/HoodieAvroReadSupport.java  |  0
 .../org/apache/hudi/common/util/TestAvroOrcUtils.java   |  4 ++++
 .../apache/hudi/io/hadoop}/TestOrcReaderIterator.java   | 17 ++++++++++-------
 .../org/apache/hudi/functional/TestOrcBootstrap.java    |  2 +-
 .../deltastreamer/HoodieDeltaStreamerTestBase.java      |  3 ++-
 .../hudi/utilities/testutils/UtilitiesTestBase.java     |  3 ++-
 15 files changed, 34 insertions(+), 25 deletions(-)

diff --git 
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
 
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
index 2d4457d575b..86944ae3f5b 100644
--- 
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
+++ 
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
@@ -25,11 +25,11 @@ import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
 import org.apache.hudi.common.util.AvroOrcUtils;
-import org.apache.hudi.common.util.OrcReaderIterator;
 import org.apache.hudi.common.util.queue.HoodieExecutor;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.io.HoodieBootstrapHandle;
+import org.apache.hudi.io.hadoop.OrcReaderIterator;
 import org.apache.hudi.keygen.KeyGeneratorInterface;
 import org.apache.hudi.storage.StoragePath;
 import org.apache.hudi.table.HoodieTable;
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index a379e305d0e..0893637b956 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.collection.ClosableIterator;
 import org.apache.hudi.common.util.collection.CloseableMappingIterator;
 import org.apache.hudi.exception.HoodieIOException;
@@ -33,7 +34,6 @@ import org.apache.hudi.io.SeekableDataInputStream;
 import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase;
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
-import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
 import org.apache.hudi.storage.HoodieStorage;
 import org.apache.hudi.storage.HoodieStorageUtils;
 import org.apache.hudi.storage.StorageConfiguration;
@@ -76,6 +76,7 @@ import static 
org.apache.hudi.common.util.ValidationUtils.checkState;
 public class HoodieHFileDataBlock extends HoodieDataBlock {
   private static final Logger LOG = 
LoggerFactory.getLogger(HoodieHFileDataBlock.class);
   private static final int DEFAULT_BLOCK_SIZE = 1024 * 1024;
+  private static final String KV_COMPARATOR_CLASS_NAME = 
"org.apache.hudi.io.storage.HoodieHBaseKVComparator";
 
   private final Option<Compression.Algorithm> compressionAlgorithm;
   // This path is used for constructing HFile reader context, which should not 
be
@@ -121,7 +122,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
     HFileContext context = new HFileContextBuilder()
         .withBlockSize(DEFAULT_BLOCK_SIZE)
         .withCompression(compressionAlgorithm.get())
-        .withCellComparator(new HoodieHBaseKVComparator())
+        
.withCellComparator(ReflectionUtils.loadClass(KV_COMPARATOR_CLASS_NAME))
         .build();
 
     Configuration conf = new Configuration();
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
 
b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
index 4139f1fa396..31f6b1c562d 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
@@ -32,7 +32,6 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
-import org.apache.hudi.common.util.AvroOrcUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
@@ -49,7 +48,6 @@ import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericFixed;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.hadoop.fs.Path;
-import org.apache.orc.TypeDescription;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -162,12 +160,10 @@ public class HoodieTestDataGenerator implements 
AutoCloseable {
 
   public static final Schema AVRO_SCHEMA = new 
Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
   public static final Schema NESTED_AVRO_SCHEMA = new 
Schema.Parser().parse(TRIP_NESTED_EXAMPLE_SCHEMA);
-  public static final TypeDescription ORC_SCHEMA = 
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA));
   public static final Schema AVRO_SCHEMA_WITH_METADATA_FIELDS =
       HoodieAvroUtils.addMetadataFields(AVRO_SCHEMA);
   public static final Schema AVRO_SHORT_TRIP_SCHEMA = new 
Schema.Parser().parse(SHORT_TRIP_SCHEMA);
   public static final Schema AVRO_TRIP_SCHEMA = new 
Schema.Parser().parse(TRIP_SCHEMA);
-  public static final TypeDescription ORC_TRIP_SCHEMA = 
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
   public static final Schema FLATTENED_AVRO_SCHEMA = new 
Schema.Parser().parse(TRIP_FLATTENED_SCHEMA);
 
   private final Random rand;
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
similarity index 100%
rename from 
hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
rename to 
hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
diff --git 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
index e34f8c4f195..6bbae77d4b9 100644
--- a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
+++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
@@ -29,6 +29,7 @@ import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.MetadataNotFoundException;
 import org.apache.hudi.hadoop.fs.HadoopFSUtils;
+import org.apache.hudi.io.hadoop.OrcReaderIterator;
 import org.apache.hudi.keygen.BaseKeyGenerator;
 import org.apache.hudi.storage.HoodieStorage;
 import org.apache.hudi.storage.HoodieStorageUtils;
diff --git 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
index c1f5b79c227..917b8a1a627 100644
--- 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
+++ 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.util.AvroOrcUtils;
 import org.apache.hudi.common.util.BaseFileUtils;
-import org.apache.hudi.common.util.OrcReaderIterator;
 import org.apache.hudi.common.util.collection.ClosableIterator;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieIOException;
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java
similarity index 87%
rename from 
hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
rename to 
hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java
index 6b6e46e7a8d..3ef5c911760 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
+++ 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java
@@ -7,17 +7,20 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
-package org.apache.hudi.common.util;
+package org.apache.hudi.io.hadoop;
 
+import org.apache.hudi.common.util.AvroOrcUtils;
+import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.collection.ClosableIterator;
 import org.apache.hudi.exception.HoodieIOException;
 
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
similarity index 100%
rename from 
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
rename to 
hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
diff --git 
a/hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
 
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
similarity index 100%
rename from 
hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
rename to 
hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
diff --git 
a/hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java 
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
similarity index 100%
rename from 
hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
rename to 
hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
diff --git 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
index 6c157349974..de7968b3ce0 100644
--- 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
+++ 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
@@ -30,12 +30,16 @@ import java.util.Arrays;
 import java.util.List;
 
 import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.AVRO_SCHEMA;
+import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_SCHEMA;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 /**
  * Tests {@link AvroOrcUtils}.
  */
 public class TestAvroOrcUtils extends HoodieCommonTestHarness {
+  public static final TypeDescription ORC_SCHEMA = 
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA));
+  public static final TypeDescription ORC_TRIP_SCHEMA = 
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
 
   public static List<Arguments> testCreateOrcSchemaArgs() {
     // the ORC schema is constructed in the order as AVRO_SCHEMA:
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java
similarity index 88%
rename from 
hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
rename to 
hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java
index b439d816724..4cf6f7c27c7 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
+++ 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java
@@ -7,16 +7,19 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
-package org.apache.hudi.common.util;
+package org.apache.hudi.io.hadoop;
+
+import org.apache.hudi.common.util.AvroOrcUtils;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
 
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
index 9b0199a869c..2db842c13a8 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
@@ -43,7 +43,6 @@ import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.testutils.RawTripTestPayload;
 import org.apache.hudi.common.util.AvroOrcUtils;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.OrcReaderIterator;
 import org.apache.hudi.common.util.PartitionPathEncodeUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieBootstrapConfig;
@@ -52,6 +51,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.hadoop.HoodieParquetInputFormat;
 import org.apache.hudi.index.HoodieIndex.IndexType;
+import org.apache.hudi.io.hadoop.OrcReaderIterator;
 import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
 import org.apache.hudi.keygen.SimpleKeyGenerator;
 import org.apache.hudi.table.action.bootstrap.BootstrapUtils;
diff --git 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
index e0093f3c92a..6aebde9a443 100644
--- 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
+++ 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
@@ -30,6 +30,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.TestAvroOrcUtils;
 import org.apache.hudi.config.HoodieCleanConfig;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.hive.HiveSyncConfigHolder;
@@ -439,7 +440,7 @@ public class HoodieDeltaStreamerTestBase extends 
UtilitiesTestBase {
     if (useCustomSchema) {
       Helpers.saveORCToDFS(Helpers.toGenericRecords(
           dataGenerator.generateInsertsAsPerSchema("000", numRecords, 
schemaStr),
-          schema), new Path(path), HoodieTestDataGenerator.ORC_TRIP_SCHEMA);
+          schema), new Path(path), TestAvroOrcUtils.ORC_TRIP_SCHEMA);
     } else {
       Helpers.saveORCToDFS(Helpers.toGenericRecords(
           dataGenerator.generateInserts("000", numRecords)), new Path(path));
diff --git 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
index 90104ab7ab2..afb9a9ad97c 100644
--- 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
+++ 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
@@ -31,6 +31,7 @@ import 
org.apache.hudi.common.testutils.minicluster.HdfsTestService;
 import org.apache.hudi.common.testutils.minicluster.ZookeeperTestService;
 import org.apache.hudi.common.util.AvroOrcUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.TestAvroOrcUtils;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.hive.HiveSyncConfig;
 import org.apache.hudi.hive.ddl.JDBCExecutor;
@@ -429,7 +430,7 @@ public class UtilitiesTestBase {
     }
 
     public static void saveORCToDFS(List<GenericRecord> records, Path 
targetFile) throws IOException {
-      saveORCToDFS(records, targetFile, HoodieTestDataGenerator.ORC_SCHEMA);
+      saveORCToDFS(records, targetFile, TestAvroOrcUtils.ORC_SCHEMA);
     }
 
     public static void saveORCToDFS(List<GenericRecord> records, Path 
targetFile, TypeDescription schema) throws IOException {

Reply via email to