deniskuzZ commented on code in PR #5934:
URL: https://github.com/apache/hive/pull/5934#discussion_r2188221837


##########
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java:
##########
@@ -71,8 +70,8 @@ public class SplitGrouper {
 
   // TODO This needs to be looked at. Map of Map to Map... Made concurrent for 
now since split generation
   // can happen in parallel.
-  private static final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> 
cache =
-      new ConcurrentHashMap<>();
+  private final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache =

Review Comment:
   @BsoBird, can we refactor with MultiMap?
   ````
   Subject: [PATCH] refactor
   ---
   Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
   IDEA additional info:
   Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
   <+>UTF-8
   ===================================================================
   diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
   --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java      
(revision ade51d810b383dd68874ecaa275c3a6cc9157884)
   +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java      
(date 1751805169036)
   @@ -23,6 +23,7 @@
    import java.nio.file.Files;
    import java.nio.file.Paths;
    import java.util.ArrayList;
   +import java.util.Collection;
    import java.util.Collections;
    import java.util.HashMap;
    import java.util.Iterator;
   @@ -32,6 +33,8 @@
    import java.util.Properties;
    import java.util.Set;
    
   +import org.apache.commons.collections.CollectionUtils;
   +import org.apache.commons.collections.MapUtils;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
   @@ -72,6 +75,7 @@
    import com.google.common.cache.Cache;
    import com.google.common.cache.CacheBuilder;
    import com.google.common.collect.ImmutableMap;
   +import com.google.common.collect.Multimap;
    
    /**
     * An util class for various Hive file format tasks.
   @@ -360,17 +364,17 @@
      }
    
      public static <T> T getFromPathRecursively(Map<Path, T> 
pathToPartitionInfo, Path dir,
   -      Map<Map<Path, T>, Map<Path, T>> cacheMap) throws IOException {
   +      Multimap<Map<Path, T>, Path> cacheMap) throws IOException {
        return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, 
false);
      }
    
      public static <T> T getFromPathRecursively(Map<Path, T> 
pathToPartitionInfo, Path dir,
   -      Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema) 
throws IOException {
   +      Multimap<Map<Path, T>, Path> cacheMap, boolean ignoreSchema) throws 
IOException {
        return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, 
ignoreSchema, false);
      }
    
      public static <T> T getFromPathRecursively(Map<Path, T> 
pathToPartitionInfo, Path dir,
   -      Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema, 
boolean ifPresent)
   +      Multimap<Map<Path, T>, Path> cacheMap, boolean ignoreSchema, boolean 
ifPresent)
              throws IOException {
        T part = getFromPath(pathToPartitionInfo, dir);
    
   @@ -381,14 +385,19 @@
    
          Map<Path, T> newPathToPartitionInfo = null;
          if (cacheMap != null) {
   -        newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
   +        Collection<Path> keys = cacheMap.get(pathToPartitionInfo);
   +        if (keys != null) {
   +          newPathToPartitionInfo =
   +              populateNewT(keys, pathToPartitionInfo.values());
   +        }
          }
    
   -      if (newPathToPartitionInfo == null) { // still null
   -        newPathToPartitionInfo = populateNewT(pathToPartitionInfo);
   +      if (MapUtils.isEmpty(newPathToPartitionInfo)) { // still null
   +        newPathToPartitionInfo =
   +            populateNewT(pathToPartitionInfo.keySet(), 
pathToPartitionInfo.values());
    
            if (cacheMap != null) {
   -          cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
   +          cacheMap.putAll(pathToPartitionInfo, 
newPathToPartitionInfo.keySet());
            }
          }
          part = getFromPath(newPathToPartitionInfo, dir);
   @@ -401,12 +410,18 @@
        }
      }
    
   -  private static <T> Map<Path, T> populateNewT(Map<Path, T> 
pathToPartitionInfo) {
   +  private static <T> Map<Path, T> populateNewT(Collection<Path> keys, 
Collection<T> values) {
   +    if (keys.size() != values.size()) {
   +      throw new IllegalArgumentException("Key and value collections must 
have the same size.");
   +    }
        Map<Path, T> newPathToPartitionInfo = new HashMap<>();
   -    for (Map.Entry<Path, T> entry: pathToPartitionInfo.entrySet()) {
   -      T partDesc = entry.getValue();
   -      Path pathOnly = Path.getPathWithoutSchemeAndAuthority(entry.getKey());
   -      newPathToPartitionInfo.put(pathOnly, partDesc);
   +    Iterator<Path> keyIterator = keys.iterator();
   +    Iterator<T> valueIterator = values.iterator();
   +
   +    while (keyIterator.hasNext() && valueIterator.hasNext()) {
   +      Path key = Path.getPathWithoutSchemeAndAuthority(
   +          keyIterator.next());
   +      newPathToPartitionInfo.put(key, valueIterator.next());
        }
        return newPathToPartitionInfo;
      }
   Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
   IDEA additional info:
   Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
   <+>UTF-8
   ===================================================================
   diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
   --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java       
(revision ade51d810b383dd68874ecaa275c3a6cc9157884)
   +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java       
(date 1751800901858)
   @@ -71,8 +71,8 @@
    
      // TODO This needs to be looked at. Map of Map to Map... Made concurrent 
for now since split generation
      // can happen in parallel.
   -  private static final Map<Map<Path, PartitionDesc>, Map<Path, 
PartitionDesc>> cache =
   -      new ConcurrentHashMap<>();
   +  private final Multimap<Map<Path, PartitionDesc>, Path> cache =
   +      ArrayListMultimap.create();
    
      private final TezMapredSplitsGrouper tezGrouper = new 
TezMapredSplitsGrouper();
    
   
   
   ````
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to