codope commented on code in PR #11146: URL: https://github.com/apache/hudi/pull/11146#discussion_r1610397009
########## hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java: ########## @@ -783,4 +784,101 @@ public int getNumFileGroupsForPartition(MetadataPartitionType partition) { metadataFileSystemView, partition.getPartitionPath())); return partitionFileSliceMap.get(partition.getPartitionPath()).size(); } + + @Override + protected Map<String, String> getSecondaryKeysForRecordKeys(List<String> recordKeys, String partitionName) { + if (recordKeys.isEmpty()) { + return Collections.emptyMap(); + } + + // Load the file slices for the partition. Each file slice is a shard which saves a portion of the keys. + List<FileSlice> partitionFileSlices = partitionFileSliceMap.computeIfAbsent(partitionName, + k -> HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, metadataFileSystemView, partitionName)); + final int numFileSlices = partitionFileSlices.size(); + ValidationUtils.checkState(numFileSlices > 0, "Number of file slices for partition " + partitionName + " should be > 0"); + + // Lookup keys from each file slice + // TODO: parallelize this loop + Map<String, String> reverseSecondaryKeyMap = new HashMap<>(); + for (FileSlice partition : partitionFileSlices) { + reverseLookupSecondaryKeys(partitionName, recordKeys, partition, reverseSecondaryKeyMap); + } + + return reverseSecondaryKeyMap; + } + + private void reverseLookupSecondaryKeys(String partitionName, List<String> recordKeys, FileSlice fileSlice, Map<String, String> recordKeyMap) { + Set<String> keySet = new HashSet<>(recordKeys.size()); + Map<String, HoodieRecord<HoodieMetadataPayload>> logRecordsMap = new HashMap<>(); Review Comment: IDE suggests some duplicate blocks in previously written code but none in the new code. However, I think you are talking about few lines below where we get the readers and return early if they are null. I will refactor and reuse as much as possible. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org