deniskuzZ commented on code in PR #5934:
URL: https://github.com/apache/hive/pull/5934#discussion_r2188343170


##########
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java:
##########
@@ -71,8 +70,8 @@ public class SplitGrouper {
 
   // TODO This needs to be looked at. Map of Map to Map... Made concurrent for 
now since split generation
   // can happen in parallel.
-  private static final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> 
cache =
-      new ConcurrentHashMap<>();
+  private final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache =

Review Comment:
   updated
   ````
   From c62bbba350e9168d5bab4e727f1f5e18ff8fc26d Mon Sep 17 00:00:00 2001
   From: Denys Kuzmenko <denisk...@gmail.com>
   Date: Sun, 6 Jul 2025 15:40:18 +0300
   Subject: [PATCH 1/1] test
   
   ---
    .../hadoop/hive/ql/exec/tez/SplitGrouper.java |  5 +--
    .../hive/ql/io/HiveFileFormatUtils.java       | 39 +++++++++----------
    2 files changed, 19 insertions(+), 25 deletions(-)
   
   diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
   index 2ce7ed88c8..f867c251a8 100644
   --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
   +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
   @@ -69,10 +69,7 @@ public class SplitGrouper {
    
      private static final Logger LOG = 
LoggerFactory.getLogger(SplitGrouper.class);
    
   -  // TODO This needs to be looked at. Map of Map to Map... Made concurrent 
for now since split generation
   -  // can happen in parallel.
   -  private static final Map<Map<Path, PartitionDesc>, Map<Path, 
PartitionDesc>> cache =
   -      new ConcurrentHashMap<>();
   +  private final Map<Path, Path> cache = new ConcurrentHashMap<>();
    
      private final TezMapredSplitsGrouper tezGrouper = new 
TezMapredSplitsGrouper();
    
   diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
   index 20fde98c26..644ce9a3fd 100644
   --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
   +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
   @@ -360,17 +360,17 @@ private static RecordUpdater getRecordUpdater(JobConf 
jc,
      }
    
      public static <T> T getFromPathRecursively(Map<Path, T> 
pathToPartitionInfo, Path dir,
   -      Map<Map<Path, T>, Map<Path, T>> cacheMap) throws IOException {
   +      Map<Path, Path> cacheMap) throws IOException {
        return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, 
false);
      }
    
      public static <T> T getFromPathRecursively(Map<Path, T> 
pathToPartitionInfo, Path dir,
   -      Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema) 
throws IOException {
   +      Map<Path, Path> cacheMap, boolean ignoreSchema) throws IOException {
        return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, 
ignoreSchema, false);
      }
    
      public static <T> T getFromPathRecursively(Map<Path, T> 
pathToPartitionInfo, Path dir,
   -      Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema, 
boolean ifPresent)
   +      Map<Path, Path> cacheMap, boolean ignoreSchema, boolean ifPresent)
              throws IOException {
        T part = getFromPath(pathToPartitionInfo, dir);
    
   @@ -379,18 +379,7 @@ public static <T> T getFromPathRecursively(Map<Path, T> 
pathToPartitionInfo, Pat
                || (dir.toUri().getScheme() == null || 
dir.toUri().getScheme().trim().equals(""))
                || 
FileUtils.pathsContainNoScheme(pathToPartitionInfo.keySet()))) {
    
   -      Map<Path, T> newPathToPartitionInfo = null;
   -      if (cacheMap != null) {
   -        newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
   -      }
   -
   -      if (newPathToPartitionInfo == null) { // still null
   -        newPathToPartitionInfo = populateNewT(pathToPartitionInfo);
   -
   -        if (cacheMap != null) {
   -          cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
   -        }
   -      }
   +      Map<Path, T> newPathToPartitionInfo = 
populateNewT(pathToPartitionInfo, cacheMap);
          part = getFromPath(newPathToPartitionInfo, dir);
        }
        if (part != null || ifPresent) {
   @@ -401,13 +390,21 @@ public static <T> T getFromPathRecursively(Map<Path, 
T> pathToPartitionInfo, Pat
        }
      }
    
   -  private static <T> Map<Path, T> populateNewT(Map<Path, T> 
pathToPartitionInfo) {
   -    Map<Path, T> newPathToPartitionInfo = new HashMap<>();
   -    for (Map.Entry<Path, T> entry: pathToPartitionInfo.entrySet()) {
   -      T partDesc = entry.getValue();
   -      Path pathOnly = Path.getPathWithoutSchemeAndAuthority(entry.getKey());
   +  private static <T> Map<Path, T> populateNewT(Map<Path, T> 
pathToPartitionInfo,
   +      Map<Path, Path> cacheMap) {
   +    Map<Path, T> newPathToPartitionInfo = new 
HashMap<>(pathToPartitionInfo.size());
   +
   +    pathToPartitionInfo.forEach((originalPath, partDesc) -> {
   +      Path pathOnly = cacheMap != null ?
   +        cacheMap.get(originalPath) : null;
   +      if (pathOnly == null) {
   +        pathOnly = Path.getPathWithoutSchemeAndAuthority(originalPath);
   +        if (cacheMap != null) {
   +          cacheMap.put(originalPath, pathOnly);
   +        }
   +      }
          newPathToPartitionInfo.put(pathOnly, partDesc);
   -    }
   +    });
        return newPathToPartitionInfo;
      }
    
   -- 
   2.48.0
   ````
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to