This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 13a8e5c7297 [HUDI-5348] Cache file slices in HoodieBackedTableMetadata 
(#7436)
13a8e5c7297 is described below

commit 13a8e5c729750ba5907d75df3d22473feaaa2a03
Author: Y Ethan Guo <ethan.guoyi...@gmail.com>
AuthorDate: Mon Dec 12 17:00:10 2022 -0800

    [HUDI-5348] Cache file slices in HoodieBackedTableMetadata (#7436)
---
 .../org/apache/hudi/metadata/HoodieBackedTableMetadata.java | 13 +++++++++++--
 .../org/apache/hudi/metadata/HoodieTableMetadataUtil.java   | 10 ++++++----
 .../java/org/apache/hudi/utilities/TestHoodieIndexer.java   |  7 +++++--
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index 7743a65bf05..e2fbc4e6716 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -40,6 +40,7 @@ import 
org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
@@ -78,6 +79,7 @@ import static 
org.apache.hudi.common.util.ValidationUtils.checkArgument;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_FILES;
+import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getFileSystemView;
 
 /**
  * Table metadata provided by an internal DFS backed Hudi metadata table.
@@ -92,6 +94,7 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
   // Metadata table's timeline and metaclient
   private HoodieTableMetaClient metadataMetaClient;
   private HoodieTableConfig metadataTableConfig;
+  private HoodieTableFileSystemView metadataFileSystemView;
   // should we reuse the open file handles, across calls
   private final boolean reuse;
 
@@ -120,6 +123,7 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
     } else if (this.metadataMetaClient == null) {
       try {
         this.metadataMetaClient = 
HoodieTableMetaClient.builder().setConf(hadoopConf.get()).setBasePath(metadataBasePath).build();
+        this.metadataFileSystemView = getFileSystemView(metadataMetaClient);
         this.metadataTableConfig = metadataMetaClient.getTableConfig();
         this.isBloomFilterIndexEnabled = 
metadataConfig.isBloomFilterIndexEnabled();
         this.isColumnStatsIndexEnabled = 
metadataConfig.isColumnStatsIndexEnabled();
@@ -127,11 +131,13 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
         LOG.warn("Metadata table was not found at path " + metadataBasePath);
         this.isMetadataTableEnabled = false;
         this.metadataMetaClient = null;
+        this.metadataFileSystemView = null;
         this.metadataTableConfig = null;
       } catch (Exception e) {
         LOG.error("Failed to initialize metadata table at path " + 
metadataBasePath, e);
         this.isMetadataTableEnabled = false;
         this.metadataMetaClient = null;
+        this.metadataFileSystemView = null;
         this.metadataTableConfig = null;
       }
     }
@@ -162,7 +168,8 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
     //       to scan all file-groups for all key-prefixes as each of these 
might contain some
     //       records matching the key-prefix
     List<FileSlice> partitionFileSlices =
-        
HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, 
partitionName);
+        HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(
+            metadataMetaClient, metadataFileSystemView, partitionName);
 
     return (shouldLoadInMemory ? HoodieListData.lazy(partitionFileSlices) : 
engineContext.parallelize(partitionFileSlices))
         .flatMap((SerializableFunction<FileSlice, 
Iterator<HoodieRecord<HoodieMetadataPayload>>>) fileSlice -> {
@@ -379,7 +386,8 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
   private Map<Pair<String, FileSlice>, List<String>> 
getPartitionFileSliceToKeysMapping(final String partitionName, final 
List<String> keys) {
     // Metadata is in sync till the latest completed instant on the dataset
     List<FileSlice> latestFileSlices =
-        
HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, 
partitionName);
+        HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(
+            metadataMetaClient, metadataFileSystemView, partitionName);
 
     Map<Pair<String, FileSlice>, List<String>> partitionFileSliceToKeysMap = 
new HashMap<>();
     for (String key : keys) {
@@ -646,6 +654,7 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
     dataMetaClient.reloadActiveTimeline();
     if (metadataMetaClient != null) {
       metadataMetaClient.reloadActiveTimeline();
+      metadataFileSystemView = getFileSystemView(metadataMetaClient);
     }
     // the cached reader has max instant time restriction, they should be 
cleared
     // because the metadata timeline may have changed.
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index 5896c1a5ebb..0ceb43b86c6 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -991,13 +991,15 @@ public class HoodieTableMetadataUtil {
    * just before the compaction instant time. The list of file slices returned 
is
    * sorted in the correct order of file group name.
    *
-   * @param metaClient - Instance of {@link HoodieTableMetaClient}.
-   * @param partition  - The name of the partition whose file groups are to be 
loaded.
+   * @param metaClient Instance of {@link HoodieTableMetaClient}.
+   * @param fsView     Metadata table filesystem view.
+   * @param partition  The name of the partition whose file groups are to be 
loaded.
    * @return List of latest file slices for all file groups in a given 
partition.
    */
-  public static List<FileSlice> 
getPartitionLatestMergedFileSlices(HoodieTableMetaClient metaClient, String 
partition) {
+  public static List<FileSlice> getPartitionLatestMergedFileSlices(
+      HoodieTableMetaClient metaClient, HoodieTableFileSystemView fsView, 
String partition) {
     LOG.info("Loading latest merged file slices for metadata table partition " 
+ partition);
-    return getPartitionFileSlices(metaClient, Option.empty(), partition, true);
+    return getPartitionFileSlices(metaClient, Option.of(fsView), partition, 
true);
   }
 
   /**
diff --git 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
index f5a0fadc87f..ac7b86f4cfa 100644
--- 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
+++ 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
@@ -56,6 +56,7 @@ import java.util.stream.Stream;
 
 import static org.apache.hudi.common.table.HoodieTableMetaClient.reload;
 import static 
org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
+import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.getFileSystemView;
 import static 
org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
 import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
 import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
@@ -175,7 +176,8 @@ public class TestHoodieIndexer extends 
SparkClientFunctionalTestHarness implemen
 
     HoodieTableMetaClient metadataMetaClient = 
HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(metaClient.getMetaPath()
 + "/metadata").build();
     List<FileSlice> partitionFileSlices =
-        
HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, 
COLUMN_STATS.getPartitionPath());
+        HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(
+            metadataMetaClient, getFileSystemView(metadataMetaClient), 
COLUMN_STATS.getPartitionPath());
     assertEquals(partitionFileSlices.size(), colStatsFileGroupCount);
   }
 
@@ -220,7 +222,8 @@ public class TestHoodieIndexer extends 
SparkClientFunctionalTestHarness implemen
 
     HoodieTableMetaClient metadataMetaClient = 
HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(metaClient.getMetaPath()
 + "/metadata").build();
     List<FileSlice> partitionFileSlices =
-        
HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, 
COLUMN_STATS.getPartitionPath());
+        HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(
+            metadataMetaClient, getFileSystemView(metadataMetaClient), 
COLUMN_STATS.getPartitionPath());
     assertEquals(partitionFileSlices.size(), 
HoodieMetadataConfig.METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT.defaultValue());
   }
 

Reply via email to