This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push: new badcca2ebe8 [HUDI-7742] Move Hadoop-dependent reader util classes to hudi-hadoop-common module (#11190) badcca2ebe8 is described below commit badcca2ebe8c30efa3fc13cad4c3f0114101874a Author: Y Ethan Guo <ethan.guoyi...@gmail.com> AuthorDate: Fri May 10 14:20:00 2024 -0700 [HUDI-7742] Move Hadoop-dependent reader util classes to hudi-hadoop-common module (#11190) --- .../action/bootstrap/OrcBootstrapMetadataHandler.java | 2 +- .../common/table/log/block/HoodieHFileDataBlock.java | 5 +++-- .../hudi/common/testutils/HoodieTestDataGenerator.java | 4 ---- .../java/org/apache/hudi/common/util/AvroOrcUtils.java | 0 .../main/java/org/apache/hudi/common/util/OrcUtils.java | 1 + .../org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java | 1 - .../org/apache/hudi/io/hadoop}/OrcReaderIterator.java | 17 ++++++++++------- .../apache/hudi/io/storage/HoodieHBaseKVComparator.java | 0 .../parquet/avro/HoodieAvroParquetReaderBuilder.java | 0 .../org/apache/parquet/avro/HoodieAvroReadSupport.java | 0 .../org/apache/hudi/common/util/TestAvroOrcUtils.java | 4 ++++ .../apache/hudi/io/hadoop}/TestOrcReaderIterator.java | 17 ++++++++++------- .../org/apache/hudi/functional/TestOrcBootstrap.java | 2 +- .../deltastreamer/HoodieDeltaStreamerTestBase.java | 3 ++- .../hudi/utilities/testutils/UtilitiesTestBase.java | 3 ++- 15 files changed, 34 insertions(+), 25 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java index 2d4457d575b..86944ae3f5b 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java @@ -25,11 +25,11 @@ import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.util.AvroOrcUtils; -import org.apache.hudi.common.util.OrcReaderIterator; import org.apache.hudi.common.util.queue.HoodieExecutor; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.io.HoodieBootstrapHandle; +import org.apache.hudi.io.hadoop.OrcReaderIterator; import org.apache.hudi.keygen.KeyGeneratorInterface; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.table.HoodieTable; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index a379e305d0e..0893637b956 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.CloseableMappingIterator; import org.apache.hudi.exception.HoodieIOException; @@ -33,7 +34,6 @@ import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; import org.apache.hudi.io.storage.HoodieFileReader; import org.apache.hudi.io.storage.HoodieFileReaderFactory; -import org.apache.hudi.io.storage.HoodieHBaseKVComparator; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StorageConfiguration; @@ -76,6 +76,7 @@ import static org.apache.hudi.common.util.ValidationUtils.checkState; public class HoodieHFileDataBlock extends HoodieDataBlock { private static final Logger LOG = LoggerFactory.getLogger(HoodieHFileDataBlock.class); private static final int DEFAULT_BLOCK_SIZE = 1024 * 1024; + private static final String KV_COMPARATOR_CLASS_NAME = "org.apache.hudi.io.storage.HoodieHBaseKVComparator"; private final Option<Compression.Algorithm> compressionAlgorithm; // This path is used for constructing HFile reader context, which should not be @@ -121,7 +122,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock { HFileContext context = new HFileContextBuilder() .withBlockSize(DEFAULT_BLOCK_SIZE) .withCompression(compressionAlgorithm.get()) - .withCellComparator(new HoodieHBaseKVComparator()) + .withCellComparator(ReflectionUtils.loadClass(KV_COMPARATOR_CLASS_NAME)) .build(); Configuration conf = new Configuration(); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java index 4139f1fa396..31f6b1c562d 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java @@ -32,7 +32,6 @@ import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; -import org.apache.hudi.common.util.AvroOrcUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; @@ -49,7 +48,6 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.fs.Path; -import org.apache.orc.TypeDescription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -162,12 +160,10 @@ public class HoodieTestDataGenerator implements AutoCloseable { public static final Schema AVRO_SCHEMA = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA); public static final Schema NESTED_AVRO_SCHEMA = new Schema.Parser().parse(TRIP_NESTED_EXAMPLE_SCHEMA); - public static final TypeDescription ORC_SCHEMA = AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA)); public static final Schema AVRO_SCHEMA_WITH_METADATA_FIELDS = HoodieAvroUtils.addMetadataFields(AVRO_SCHEMA); public static final Schema AVRO_SHORT_TRIP_SCHEMA = new Schema.Parser().parse(SHORT_TRIP_SCHEMA); public static final Schema AVRO_TRIP_SCHEMA = new Schema.Parser().parse(TRIP_SCHEMA); - public static final TypeDescription ORC_TRIP_SCHEMA = AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA)); public static final Schema FLATTENED_AVRO_SCHEMA = new Schema.Parser().parse(TRIP_FLATTENED_SCHEMA); private final Random rand; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java index e34f8c4f195..6bbae77d4b9 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java @@ -29,6 +29,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.MetadataNotFoundException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.io.hadoop.OrcReaderIterator; import org.apache.hudi.keygen.BaseKeyGenerator; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.HoodieStorageUtils; diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java index c1f5b79c227..917b8a1a627 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java @@ -23,7 +23,6 @@ import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.util.AvroOrcUtils; import org.apache.hudi.common.util.BaseFileUtils; -import org.apache.hudi.common.util.OrcReaderIterator; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieIOException; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java similarity index 87% rename from hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java index 6b6e46e7a8d..3ef5c911760 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java @@ -7,17 +7,20 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.util; +package org.apache.hudi.io.hadoop; +import org.apache.hudi.common.util.AvroOrcUtils; +import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieIOException; diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java diff --git a/hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java similarity index 100% rename from hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java rename to hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java diff --git a/hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java similarity index 100% rename from hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java rename to hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java index 6c157349974..de7968b3ce0 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java @@ -30,12 +30,16 @@ import java.util.Arrays; import java.util.List; import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.AVRO_SCHEMA; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_SCHEMA; import static org.junit.jupiter.api.Assertions.assertEquals; /** * Tests {@link AvroOrcUtils}. */ public class TestAvroOrcUtils extends HoodieCommonTestHarness { + public static final TypeDescription ORC_SCHEMA = AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA)); + public static final TypeDescription ORC_TRIP_SCHEMA = AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA)); public static List<Arguments> testCreateOrcSchemaArgs() { // the ORC schema is constructed in the order as AVRO_SCHEMA: diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java similarity index 88% rename from hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java rename to hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java index b439d816724..4cf6f7c27c7 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java @@ -7,16 +7,19 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.util; +package org.apache.hudi.io.hadoop; + +import org.apache.hudi.common.util.AvroOrcUtils; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java index 9b0199a869c..2db842c13a8 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java @@ -43,7 +43,6 @@ import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.testutils.RawTripTestPayload; import org.apache.hudi.common.util.AvroOrcUtils; import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.OrcReaderIterator; import org.apache.hudi.common.util.PartitionPathEncodeUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieBootstrapConfig; @@ -52,6 +51,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.HoodieParquetInputFormat; import org.apache.hudi.index.HoodieIndex.IndexType; +import org.apache.hudi.io.hadoop.OrcReaderIterator; import org.apache.hudi.keygen.NonpartitionedKeyGenerator; import org.apache.hudi.keygen.SimpleKeyGenerator; import org.apache.hudi.table.action.bootstrap.BootstrapUtils; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java index e0093f3c92a..6aebde9a443 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java @@ -30,6 +30,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.TestAvroOrcUtils; import org.apache.hudi.config.HoodieCleanConfig; import org.apache.hudi.config.HoodieClusteringConfig; import org.apache.hudi.hive.HiveSyncConfigHolder; @@ -439,7 +440,7 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase { if (useCustomSchema) { Helpers.saveORCToDFS(Helpers.toGenericRecords( dataGenerator.generateInsertsAsPerSchema("000", numRecords, schemaStr), - schema), new Path(path), HoodieTestDataGenerator.ORC_TRIP_SCHEMA); + schema), new Path(path), TestAvroOrcUtils.ORC_TRIP_SCHEMA); } else { Helpers.saveORCToDFS(Helpers.toGenericRecords( dataGenerator.generateInserts("000", numRecords)), new Path(path)); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java index 90104ab7ab2..afb9a9ad97c 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java @@ -31,6 +31,7 @@ import org.apache.hudi.common.testutils.minicluster.HdfsTestService; import org.apache.hudi.common.testutils.minicluster.ZookeeperTestService; import org.apache.hudi.common.util.AvroOrcUtils; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.TestAvroOrcUtils; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.ddl.JDBCExecutor; @@ -429,7 +430,7 @@ public class UtilitiesTestBase { } public static void saveORCToDFS(List<GenericRecord> records, Path targetFile) throws IOException { - saveORCToDFS(records, targetFile, HoodieTestDataGenerator.ORC_SCHEMA); + saveORCToDFS(records, targetFile, TestAvroOrcUtils.ORC_SCHEMA); } public static void saveORCToDFS(List<GenericRecord> records, Path targetFile, TypeDescription schema) throws IOException {