This is an automated email from the ASF dual-hosted git repository. pwason pushed a commit to branch release-0.14.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit da699fea98d4bbd5496c8ad7af70990ff592f3cf Author: Nicholas Jiang <programg...@163.com> AuthorDate: Wed Aug 16 03:13:15 2023 +0800 [HUDI-6553][FOLLOW-UP] Introduces Tuple3 for HoodieTableMetadataUtil (#9449) --- hudi-common/pom.xml | 7 --- .../apache/hudi/common/util/collection/Tuple3.java | 71 ++++++++++++++++++++++ .../hudi/metadata/HoodieTableMetadataUtil.java | 22 +++---- .../hudi/source/stats/ColumnStatsIndices.java | 17 +----- 4 files changed, 83 insertions(+), 34 deletions(-) diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 71f7cf85ab9..2b4eb2829b8 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -103,13 +103,6 @@ </build> <dependencies> - <!-- Scala --> - <dependency> - <groupId>org.scala-lang</groupId> - <artifactId>scala-library</artifactId> - <version>${scala.version}</version> - </dependency> - <dependency> <groupId>org.openjdk.jol</groupId> <artifactId>jol-core</artifactId> diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/Tuple3.java b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/Tuple3.java new file mode 100644 index 00000000000..40469398897 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/Tuple3.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.util.collection; + +import java.io.Serializable; + +/** + * A tuple with 3 fields. Tuples are strongly typed; each field may be of a separate type. The + * fields of the tuple can be accessed directly as public fields (f0, f1, ...). The tuple field + * positions start at zero. + * + * @param <T0> The type of field 0 + * @param <T1> The type of field 1 + * @param <T2> The type of field 2 + */ +public class Tuple3<T0, T1, T2> implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * Field 0 of the tuple. + */ + public final T0 f0; + /** + * Field 1 of the tuple. + */ + public final T1 f1; + /** + * Field 2 of the tuple. + */ + public final T2 f2; + + /** + * Creates a new tuple and assigns the given values to the tuple's fields. + * + * @param f0 The value for field 0 + * @param f1 The value for field 1 + * @param f2 The value for field 2 + */ + private Tuple3(T0 f0, T1 f1, T2 f2) { + this.f0 = f0; + this.f1 = f1; + this.f2 = f2; + } + + /** + * Creates a new tuple and assigns the given values to the tuple's fields. This is more + * convenient than using the constructor, because the compiler can infer the generic type + * arguments implicitly. For example: {@code Tuple3.of(n, x, s)} instead of {@code new + * Tuple3<Integer, Double, String>(n, x, s)} + */ + public static <T0, T1, T2> Tuple3<T0, T1, T2> of(T0 f0, T1 f1, T2 f2) { + return new Tuple3<>(f0, f1, f2); + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index 57f6b405628..a957ee8f8a8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -55,6 +55,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ParquetUtils; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.common.util.collection.Tuple3; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieMetadataException; @@ -70,6 +71,7 @@ import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.conf.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,8 +100,6 @@ import java.util.stream.Collector; import java.util.stream.Collectors; import java.util.stream.Stream; -import scala.Tuple3; - import static org.apache.hudi.avro.AvroSchemaUtils.resolveNullableSchema; import static org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields; import static org.apache.hudi.avro.HoodieAvroUtils.convertValueForSpecificDataTypes; @@ -799,9 +799,9 @@ public class HoodieTableMetadataUtil { // Create records MDT int parallelism = Math.max(Math.min(partitionFileFlagTupleList.size(), recordsGenerationParams.getBloomIndexParallelism()), 1); return engineContext.parallelize(partitionFileFlagTupleList, parallelism).flatMap(partitionFileFlagTuple -> { - final String partitionName = partitionFileFlagTuple._1(); - final String filename = partitionFileFlagTuple._2(); - final boolean isDeleted = partitionFileFlagTuple._3(); + final String partitionName = partitionFileFlagTuple.f0; + final String filename = partitionFileFlagTuple.f1; + final boolean isDeleted = partitionFileFlagTuple.f2; if (!FSUtils.isBaseFile(new Path(filename))) { LOG.warn(String.format("Ignoring file %s as it is not a base file", filename)); return Stream.<HoodieRecord>empty().iterator(); @@ -823,7 +823,7 @@ public class HoodieTableMetadataUtil { final String partition = getPartitionIdentifier(partitionName); return Stream.<HoodieRecord>of(HoodieMetadataPayload.createBloomFilterMetadataRecord( - partition, filename, instantTime, recordsGenerationParams.getBloomFilterType(), bloomFilterBuffer, partitionFileFlagTuple._3())) + partition, filename, instantTime, recordsGenerationParams.getBloomFilterType(), bloomFilterBuffer, partitionFileFlagTuple.f2)) .iterator(); }); } @@ -853,9 +853,9 @@ public class HoodieTableMetadataUtil { // Create records MDT int parallelism = Math.max(Math.min(partitionFileFlagTupleList.size(), recordsGenerationParams.getColumnStatsIndexParallelism()), 1); return engineContext.parallelize(partitionFileFlagTupleList, parallelism).flatMap(partitionFileFlagTuple -> { - final String partitionName = partitionFileFlagTuple._1(); - final String filename = partitionFileFlagTuple._2(); - final boolean isDeleted = partitionFileFlagTuple._3(); + final String partitionName = partitionFileFlagTuple.f0; + final String filename = partitionFileFlagTuple.f1; + final boolean isDeleted = partitionFileFlagTuple.f2; if (!FSUtils.isBaseFile(new Path(filename)) || !filename.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) { LOG.warn(String.format("Ignoring file %s as it is not a PARQUET file", filename)); return Stream.<HoodieRecord>empty().iterator(); @@ -884,10 +884,10 @@ public class HoodieTableMetadataUtil { + partitionToAppendedFiles.values().stream().mapToInt(Map::size).sum(); final List<Tuple3<String, String, Boolean>> partitionFileFlagTupleList = new ArrayList<>(totalFiles); partitionToDeletedFiles.entrySet().stream() - .flatMap(entry -> entry.getValue().stream().map(deletedFile -> new Tuple3<>(entry.getKey(), deletedFile, true))) + .flatMap(entry -> entry.getValue().stream().map(deletedFile -> Tuple3.of(entry.getKey(), deletedFile, true))) .collect(Collectors.toCollection(() -> partitionFileFlagTupleList)); partitionToAppendedFiles.entrySet().stream() - .flatMap(entry -> entry.getValue().keySet().stream().map(addedFile -> new Tuple3<>(entry.getKey(), addedFile, false))) + .flatMap(entry -> entry.getValue().keySet().stream().map(addedFile -> Tuple3.of(entry.getKey(), addedFile, false))) .collect(Collectors.toCollection(() -> partitionFileFlagTupleList)); return partitionFileFlagTupleList; } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/ColumnStatsIndices.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/ColumnStatsIndices.java index 48d5c9d2fa4..05931876603 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/ColumnStatsIndices.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/ColumnStatsIndices.java @@ -25,6 +25,7 @@ import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.common.util.collection.Tuple3; import org.apache.hudi.common.util.hash.ColumnIndexID; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.metadata.HoodieMetadataPayload; @@ -313,22 +314,6 @@ public class ColumnStatsIndices { // ------------------------------------------------------------------------- // Utilities // ------------------------------------------------------------------------- - private static class Tuple3 { - public Object f0; - public Object f1; - public Object f2; - - private Tuple3(Object f0, Object f1, Object f2) { - this.f0 = f0; - this.f1 = f1; - this.f2 = f2; - } - - public static Tuple3 of(Object f0, Object f1, Object f2) { - return new Tuple3(f0, f1, f2); - } - } - private static DataType getMetadataDataType() { return AvroSchemaConverter.convertToDataType(HoodieMetadataRecord.SCHEMA$); }