nsivabalan commented on code in PR #12376:
URL: https://github.com/apache/hudi/pull/12376#discussion_r1863881334


##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java:
##########
@@ -264,24 +264,18 @@ protected Map<String, 
HoodieRecord<HoodieMetadataPayload>> getRecordsByKeys(List
     final int numFileSlices = partitionFileSlices.size();
     checkState(numFileSlices > 0, "Number of file slices for partition " + 
partitionName + " should be > 0");
 
-    // Lookup keys from each file slice
-    if (numFileSlices == 1) {
-      // Optimization for a single slice for smaller metadata table partitions
-      result = lookupKeysFromFileSlice(partitionName, keys, 
partitionFileSlices.get(0));
-    } else {
-      // Parallel lookup for large sized partitions with many file slices
-      // Partition the keys by the file slice which contains it
-      ArrayList<ArrayList<String>> partitionedKeys = 
partitionKeysByFileSlices(keys, numFileSlices);
-      result = new HashMap<>(keys.size());
-      getEngineContext().setJobStatus(this.getClass().getSimpleName(), 
"Reading keys from metadata table partition " + partitionName);
-      getEngineContext().map(partitionedKeys, keysList -> {
-        if (keysList.isEmpty()) {
-          return Collections.<String, 
HoodieRecord<HoodieMetadataPayload>>emptyMap();
-        }
-        int shardIndex = 
HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(keysList.get(0), 
numFileSlices);
-        return lookupKeysFromFileSlice(partitionName, keysList, 
partitionFileSlices.get(shardIndex));
-      }, partitionedKeys.size()).forEach(result::putAll);
-    }
+    // Parallel lookup for large sized partitions with many file slices

Review Comment:
   I assume both this is just removing the if block. if you had made more 
changes, lmk. I can review it



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java:
##########
@@ -311,24 +305,18 @@ public Map<String, 
List<HoodieRecord<HoodieMetadataPayload>>> getAllRecordsByKey
     final int numFileSlices = partitionFileSlices.size();
     checkState(numFileSlices > 0, "Number of file slices for partition " + 
partitionName + " should be > 0");
 
-    // Lookup keys from each file slice
-    if (numFileSlices == 1) {
-      // Optimization for a single slice for smaller metadata table partitions
-      result = lookupAllKeysFromFileSlice(partitionName, keys, 
partitionFileSlices.get(0));
-    } else {
-      // Parallel lookup for large sized partitions with many file slices
-      // Partition the keys by the file slice which contains it
-      ArrayList<ArrayList<String>> partitionedKeys = 
partitionKeysByFileSlices(keys, numFileSlices);
-      result = new HashMap<>(keys.size());
-      getEngineContext().setJobStatus(this.getClass().getSimpleName(), 
"Reading keys from metadata table partition " + partitionName);
-      getEngineContext().map(partitionedKeys, keysList -> {
-        if (keysList.isEmpty()) {
-          return Collections.<String, 
HoodieRecord<HoodieMetadataPayload>>emptyMap();
-        }
-        int shardIndex = 
HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(keysList.get(0), 
numFileSlices);
-        return lookupAllKeysFromFileSlice(partitionName, keysList, 
partitionFileSlices.get(shardIndex));
-      }, partitionedKeys.size()).forEach(map -> result.putAll((Map<String, 
List<HoodieRecord<HoodieMetadataPayload>>>) map));
-    }
+    // Parallel lookup for large sized partitions with many file slices
+    // Partition the keys by the file slice which contains it
+    ArrayList<ArrayList<String>> partitionedKeys = 
partitionKeysByFileSlices(keys, numFileSlices);

Review Comment:
   same comment as above



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to