deniskuzZ commented on code in PR #5934: URL: https://github.com/apache/hive/pull/5934#discussion_r2188221837
########## ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java: ########## @@ -71,8 +70,8 @@ public class SplitGrouper { // TODO This needs to be looked at. Map of Map to Map... Made concurrent for now since split generation // can happen in parallel. - private static final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache = - new ConcurrentHashMap<>(); + private final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache = Review Comment: @BsoBird, can we refactor with MultiMap? ```` Subject: [PATCH] refactor --- Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (revision ade51d810b383dd68874ecaa275c3a6cc9157884) +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (date 1751805169036) @@ -23,6 +23,7 @@ import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -32,6 +33,8 @@ import java.util.Properties; import java.util.Set; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -72,6 +75,7 @@ import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Multimap; /** * An util class for various Hive file format tasks. @@ -360,17 +364,17 @@ } public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir, - Map<Map<Path, T>, Map<Path, T>> cacheMap) throws IOException { + Multimap<Map<Path, T>, Path> cacheMap) throws IOException { return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, false); } public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir, - Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema) throws IOException { + Multimap<Map<Path, T>, Path> cacheMap, boolean ignoreSchema) throws IOException { return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, ignoreSchema, false); } public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir, - Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema, boolean ifPresent) + Multimap<Map<Path, T>, Path> cacheMap, boolean ignoreSchema, boolean ifPresent) throws IOException { T part = getFromPath(pathToPartitionInfo, dir); @@ -381,14 +385,19 @@ Map<Path, T> newPathToPartitionInfo = null; if (cacheMap != null) { - newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo); + Collection<Path> keys = cacheMap.get(pathToPartitionInfo); + if (keys != null) { + newPathToPartitionInfo = + populateNewT(keys, pathToPartitionInfo.values()); + } } - if (newPathToPartitionInfo == null) { // still null - newPathToPartitionInfo = populateNewT(pathToPartitionInfo); + if (MapUtils.isEmpty(newPathToPartitionInfo)) { // still null + newPathToPartitionInfo = + populateNewT(pathToPartitionInfo.keySet(), pathToPartitionInfo.values()); if (cacheMap != null) { - cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo); + cacheMap.putAll(pathToPartitionInfo, newPathToPartitionInfo.keySet()); } } part = getFromPath(newPathToPartitionInfo, dir); @@ -401,12 +410,18 @@ } } - private static <T> Map<Path, T> populateNewT(Map<Path, T> pathToPartitionInfo) { + private static <T> Map<Path, T> populateNewT(Collection<Path> keys, Collection<T> values) { + if (keys.size() != values.size()) { + throw new IllegalArgumentException("Key and value collections must have the same size."); + } Map<Path, T> newPathToPartitionInfo = new HashMap<>(); - for (Map.Entry<Path, T> entry: pathToPartitionInfo.entrySet()) { - T partDesc = entry.getValue(); - Path pathOnly = Path.getPathWithoutSchemeAndAuthority(entry.getKey()); - newPathToPartitionInfo.put(pathOnly, partDesc); + Iterator<Path> keyIterator = keys.iterator(); + Iterator<T> valueIterator = values.iterator(); + + while (keyIterator.hasNext() && valueIterator.hasNext()) { + Path key = Path.getPathWithoutSchemeAndAuthority( + keyIterator.next()); + newPathToPartitionInfo.put(key, valueIterator.next()); } return newPathToPartitionInfo; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java (revision ade51d810b383dd68874ecaa275c3a6cc9157884) +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java (date 1751800901858) @@ -71,8 +71,8 @@ // TODO This needs to be looked at. Map of Map to Map... Made concurrent for now since split generation // can happen in parallel. - private static final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache = - new ConcurrentHashMap<>(); + private final Multimap<Map<Path, PartitionDesc>, Path> cache = + ArrayListMultimap.create(); private final TezMapredSplitsGrouper tezGrouper = new TezMapredSplitsGrouper(); ```` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org