This is an automated email from the ASF dual-hosted git repository.

nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 173e76eb3ba8 [HUDI] Add input records caching to 
HoodieGlobalSimpleIndex (#18921)
173e76eb3ba8 is described below

commit 173e76eb3ba8660224e67b36d8e72f6e1184fdf5
Author: Lokesh Jain <[email protected]>
AuthorDate: Thu Jun 11 11:54:59 2026 +0530

    [HUDI] Add input records caching to HoodieGlobalSimpleIndex (#18921)
    
    Adding caching to HoodieGlobalSimpleIndex.tagLocationInternal
    
    Co-authored-by: Lokesh Jain <[email protected]>
    Co-authored-by: Claude Sonnet 4.6 <[email protected]>
---
 .../org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java    | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
index 336f6376bcd0..b712b708a389 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
@@ -68,13 +68,20 @@ public class HoodieGlobalSimpleIndex extends 
HoodieSimpleIndex {
   protected <R> HoodieData<HoodieRecord<R>> tagLocationInternal(
       HoodieData<HoodieRecord<R>> inputRecords, HoodieEngineContext context,
       HoodieTable hoodieTable) {
+    if (config.getSimpleIndexUseCaching()) {
+      inputRecords.persist(config.getSimpleIndexInputStorageLevel());
+    }
     List<Pair<String, HoodieBaseFile>> latestBaseFiles = 
getAllBaseFilesInTable(context, hoodieTable);
     HoodiePairData<String, HoodieRecordGlobalLocation> allKeysAndLocations =
         fetchRecordGlobalLocations(context, hoodieTable, latestBaseFiles);
     boolean mayContainDuplicateLookup = 
hoodieTable.getMetaClient().getTableType() == MERGE_ON_READ;
     boolean shouldUpdatePartitionPath = 
config.getGlobalSimpleIndexUpdatePartitionPath() && hoodieTable.isPartitioned();
-    return tagGlobalLocationBackToRecords(inputRecords, allKeysAndLocations,
+    HoodieData<HoodieRecord<R>> taggedRecords = 
tagGlobalLocationBackToRecords(inputRecords, allKeysAndLocations,
         mayContainDuplicateLookup, shouldUpdatePartitionPath, config, 
hoodieTable);
+    if (config.getSimpleIndexUseCaching()) {
+      inputRecords.unpersist();
+    }
+    return taggedRecords;
   }
 
   private HoodiePairData<String, HoodieRecordGlobalLocation> 
fetchRecordGlobalLocations(

Reply via email to