This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new a36f9709e5 [hudi] Fix Hudi clone to pass configuration for Hudi
extractFiles (#5651)
a36f9709e5 is described below
commit a36f9709e5b968ce916ccdd362f7b3e3688b5f1f
Author: timmyyao <[email protected]>
AuthorDate: Thu May 22 20:24:39 2025 +0800
[hudi] Fix Hudi clone to pass configuration for Hudi extractFiles (#5651)
---
.../paimon/hive/clone/HiveCloneExtractor.java | 1 +
.../apache/paimon/hive/clone/HiveCloneUtils.java | 1 +
.../paimon/hive/clone/HiveTableCloneExtractor.java | 4 ++++
.../java/org/apache/paimon/hudi/HudiFileIndex.java | 11 ++++++++---
.../apache/paimon/hudi/HudiHiveCloneExtractor.java | 21 +++++++++++++++++----
5 files changed, 31 insertions(+), 7 deletions(-)
diff --git
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
index a223db56e3..111924f720 100644
---
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
+++
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
@@ -43,6 +43,7 @@ public interface HiveCloneExtractor {
throws Exception;
List<HivePartitionFiles> extractFiles(
+ Map<String, String> configuration,
IMetaStoreClient client,
Table table,
FileIO fileIO,
diff --git
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
index e1326ede41..d3c59f0885 100644
---
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
+++
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
@@ -135,6 +135,7 @@ public class HiveCloneUtils {
client.getTable(identifier.getDatabaseName(),
identifier.getTableName());
return HiveCloneExtractor.getExtractor(sourceTable)
.extractFiles(
+ hiveCatalog.options(),
hiveCatalog.getHmsClient(),
sourceTable,
hiveCatalog.fileIO(),
diff --git
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
index 0b503b0086..7ad4c37934 100644
---
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
+++
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
@@ -33,6 +33,8 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
@@ -54,6 +56,7 @@ import static
org.apache.paimon.hive.clone.HiveCloneUtils.parseFormat;
/** A {@link HiveCloneExtractor} for hive tables. */
public class HiveTableCloneExtractor implements HiveCloneExtractor {
+ private static final Logger LOG =
LoggerFactory.getLogger(HiveTableCloneExtractor.class);
public static final HiveTableCloneExtractor INSTANCE = new
HiveTableCloneExtractor();
@Override
@@ -70,6 +73,7 @@ public class HiveTableCloneExtractor implements
HiveCloneExtractor {
@Override
public List<HivePartitionFiles> extractFiles(
+ Map<String, String> configuration,
IMetaStoreClient client,
Table table,
FileIO fileIO,
diff --git
a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java
b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java
index de0c93b50c..6a6fd83ad3 100644
--- a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java
+++ b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java
@@ -43,6 +43,8 @@ import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.storage.StoragePathInfo;
import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
import org.apache.hudi.util.StreamerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
@@ -64,6 +66,7 @@ import static
org.apache.paimon.utils.Preconditions.checkState;
* @see org.apache.hudi.source.FileIndex
*/
public class HudiFileIndex {
+ private static final Logger LOG =
LoggerFactory.getLogger(HudiFileIndex.class);
private final Path path;
private final HoodieMetadataConfig metadataConfig;
@@ -78,14 +81,16 @@ public class HudiFileIndex {
public HudiFileIndex(
String location,
- Map<String, String> conf,
+ Map<String, String> tableOptions,
+ Map<String, String> configuration,
RowType partitionType,
@Nullable PartitionPredicate partitionPredicate) {
this.path = new Path(location);
- this.metadataConfig = metadataConfig(conf);
+ this.metadataConfig = metadataConfig(tableOptions);
Configuration hadoopConf =
HadoopConfigurations.getHadoopConf(
-
org.apache.flink.configuration.Configuration.fromMap(conf));
+
org.apache.flink.configuration.Configuration.fromMap(configuration));
+ configuration.forEach(hadoopConf::set);
this.partitionType = partitionType;
this.partitionPredicate = partitionPredicate;
this.engineContext = new HoodieFlinkEngineContext(hadoopConf);
diff --git
a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java
b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java
index b83a493b18..c00e57cd7e 100644
---
a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java
+++
b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java
@@ -31,6 +31,8 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
@@ -45,6 +47,7 @@ import static
org.apache.paimon.utils.Preconditions.checkArgument;
/** A {@link HiveCloneExtractor} for Hudi tables. */
public class HudiHiveCloneExtractor extends HiveTableCloneExtractor {
+ private static final Logger LOG =
LoggerFactory.getLogger(HudiHiveCloneExtractor.class);
@Override
public boolean matches(Table table) {
@@ -62,13 +65,22 @@ public class HudiHiveCloneExtractor extends
HiveTableCloneExtractor {
Arrays.stream(HoodieRecord.HoodieMetadataField.values())
.map(HoodieRecord.HoodieMetadataField::getFieldName)
.collect(Collectors.toSet());
- return fields.stream()
- .filter(f -> !hudiMetadataFields.contains(f.getName()))
- .collect(Collectors.toList());
+ List<FieldSchema> resultFields =
+ fields.stream()
+ .filter(f -> !hudiMetadataFields.contains(f.getName()))
+ .collect(Collectors.toList());
+ LOG.info(
+ "Hudi table {}.{} with total field count {}, and result field
count {} after filter",
+ database,
+ table,
+ fields.size(),
+ resultFields.size());
+ return resultFields;
}
@Override
public List<HivePartitionFiles> extractFiles(
+ Map<String, String> configuration,
IMetaStoreClient client,
Table table,
FileIO fileIO,
@@ -80,7 +92,8 @@ public class HudiHiveCloneExtractor extends
HiveTableCloneExtractor {
checkTableType(options);
String location = table.getSd().getLocation();
- HudiFileIndex fileIndex = new HudiFileIndex(location, options,
partitionRowType, predicate);
+ HudiFileIndex fileIndex =
+ new HudiFileIndex(location, options, configuration,
partitionRowType, predicate);
if (fileIndex.isPartitioned()) {
return fileIndex.getAllFilteredPartitionFiles(fileIO);