This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new a36f9709e5 [hudi] Fix Hudi clone to pass configuration for Hudi 
extractFiles (#5651)
a36f9709e5 is described below

commit a36f9709e5b968ce916ccdd362f7b3e3688b5f1f
Author: timmyyao <[email protected]>
AuthorDate: Thu May 22 20:24:39 2025 +0800

    [hudi] Fix Hudi clone to pass configuration for Hudi extractFiles (#5651)
---
 .../paimon/hive/clone/HiveCloneExtractor.java       |  1 +
 .../apache/paimon/hive/clone/HiveCloneUtils.java    |  1 +
 .../paimon/hive/clone/HiveTableCloneExtractor.java  |  4 ++++
 .../java/org/apache/paimon/hudi/HudiFileIndex.java  | 11 ++++++++---
 .../apache/paimon/hudi/HudiHiveCloneExtractor.java  | 21 +++++++++++++++++----
 5 files changed, 31 insertions(+), 7 deletions(-)

diff --git 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
index a223db56e3..111924f720 100644
--- 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
+++ 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneExtractor.java
@@ -43,6 +43,7 @@ public interface HiveCloneExtractor {
             throws Exception;
 
     List<HivePartitionFiles> extractFiles(
+            Map<String, String> configuration,
             IMetaStoreClient client,
             Table table,
             FileIO fileIO,
diff --git 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
index e1326ede41..d3c59f0885 100644
--- 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
+++ 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveCloneUtils.java
@@ -135,6 +135,7 @@ public class HiveCloneUtils {
                 client.getTable(identifier.getDatabaseName(), 
identifier.getTableName());
         return HiveCloneExtractor.getExtractor(sourceTable)
                 .extractFiles(
+                        hiveCatalog.options(),
                         hiveCatalog.getHmsClient(),
                         sourceTable,
                         hiveCatalog.fileIO(),
diff --git 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
index 0b503b0086..7ad4c37934 100644
--- 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
+++ 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/clone/HiveTableCloneExtractor.java
@@ -33,6 +33,8 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
 
@@ -54,6 +56,7 @@ import static 
org.apache.paimon.hive.clone.HiveCloneUtils.parseFormat;
 /** A {@link HiveCloneExtractor} for hive tables. */
 public class HiveTableCloneExtractor implements HiveCloneExtractor {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(HiveTableCloneExtractor.class);
     public static final HiveTableCloneExtractor INSTANCE = new 
HiveTableCloneExtractor();
 
     @Override
@@ -70,6 +73,7 @@ public class HiveTableCloneExtractor implements 
HiveCloneExtractor {
 
     @Override
     public List<HivePartitionFiles> extractFiles(
+            Map<String, String> configuration,
             IMetaStoreClient client,
             Table table,
             FileIO fileIO,
diff --git 
a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java 
b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java
index de0c93b50c..6a6fd83ad3 100644
--- a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java
+++ b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiFileIndex.java
@@ -43,6 +43,8 @@ import org.apache.hudi.storage.StoragePath;
 import org.apache.hudi.storage.StoragePathInfo;
 import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
 import org.apache.hudi.util.StreamerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
 
@@ -64,6 +66,7 @@ import static 
org.apache.paimon.utils.Preconditions.checkState;
  * @see org.apache.hudi.source.FileIndex
  */
 public class HudiFileIndex {
+    private static final Logger LOG = 
LoggerFactory.getLogger(HudiFileIndex.class);
 
     private final Path path;
     private final HoodieMetadataConfig metadataConfig;
@@ -78,14 +81,16 @@ public class HudiFileIndex {
 
     public HudiFileIndex(
             String location,
-            Map<String, String> conf,
+            Map<String, String> tableOptions,
+            Map<String, String> configuration,
             RowType partitionType,
             @Nullable PartitionPredicate partitionPredicate) {
         this.path = new Path(location);
-        this.metadataConfig = metadataConfig(conf);
+        this.metadataConfig = metadataConfig(tableOptions);
         Configuration hadoopConf =
                 HadoopConfigurations.getHadoopConf(
-                        
org.apache.flink.configuration.Configuration.fromMap(conf));
+                        
org.apache.flink.configuration.Configuration.fromMap(configuration));
+        configuration.forEach(hadoopConf::set);
         this.partitionType = partitionType;
         this.partitionPredicate = partitionPredicate;
         this.engineContext = new HoodieFlinkEngineContext(hadoopConf);
diff --git 
a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java 
b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java
index b83a493b18..c00e57cd7e 100644
--- 
a/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java
+++ 
b/paimon-hudi/src/main/java/org/apache/paimon/hudi/HudiHiveCloneExtractor.java
@@ -31,6 +31,8 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
 
@@ -45,6 +47,7 @@ import static 
org.apache.paimon.utils.Preconditions.checkArgument;
 
 /** A {@link HiveCloneExtractor} for Hudi tables. */
 public class HudiHiveCloneExtractor extends HiveTableCloneExtractor {
+    private static final Logger LOG = 
LoggerFactory.getLogger(HudiHiveCloneExtractor.class);
 
     @Override
     public boolean matches(Table table) {
@@ -62,13 +65,22 @@ public class HudiHiveCloneExtractor extends 
HiveTableCloneExtractor {
                 Arrays.stream(HoodieRecord.HoodieMetadataField.values())
                         .map(HoodieRecord.HoodieMetadataField::getFieldName)
                         .collect(Collectors.toSet());
-        return fields.stream()
-                .filter(f -> !hudiMetadataFields.contains(f.getName()))
-                .collect(Collectors.toList());
+        List<FieldSchema> resultFields =
+                fields.stream()
+                        .filter(f -> !hudiMetadataFields.contains(f.getName()))
+                        .collect(Collectors.toList());
+        LOG.info(
+                "Hudi table {}.{} with total field count {}, and result field 
count {} after filter",
+                database,
+                table,
+                fields.size(),
+                resultFields.size());
+        return resultFields;
     }
 
     @Override
     public List<HivePartitionFiles> extractFiles(
+            Map<String, String> configuration,
             IMetaStoreClient client,
             Table table,
             FileIO fileIO,
@@ -80,7 +92,8 @@ public class HudiHiveCloneExtractor extends 
HiveTableCloneExtractor {
         checkTableType(options);
 
         String location = table.getSd().getLocation();
-        HudiFileIndex fileIndex = new HudiFileIndex(location, options, 
partitionRowType, predicate);
+        HudiFileIndex fileIndex =
+                new HudiFileIndex(location, options, configuration, 
partitionRowType, predicate);
 
         if (fileIndex.isPartitioned()) {
             return fileIndex.getAllFilteredPartitionFiles(fileIO);

Reply via email to