hive git commit: HIVE-15870 : MM tables - parquet_join test fails (Sergey Shelukhin)

sershe Wed, 01 Mar 2017 18:56:47 -0800

Repository: hive
Updated Branches:
  refs/heads/hive-14535 f883d67e8 -> e76e8d0ad



HIVE-15870 : MM tables - parquet_join test fails (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e76e8d0a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e76e8d0a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e76e8d0a

Branch: refs/heads/hive-14535
Commit: e76e8d0ad60147179b29bbd5a39b0bac5e524065
Parents: f883d67
Author: Sergey Shelukhin <ser...@apache.org>
Authored: Wed Mar 1 18:56:02 2017 -0800
Committer: Sergey Shelukhin <ser...@apache.org>
Committed: Wed Mar 1 18:56:02 2017 -0800

----------------------------------------------------------------------
 .../io/encoded/VectorDeserializeOrcWriter.java  |  3 +-
 .../hadoop/hive/ql/exec/tez/SplitGrouper.java   |  4 +-
 .../ql/exec/vector/VectorizedRowBatchCtx.java   |  2 +-
 .../hive/ql/index/HiveIndexedInputFormat.java   |  2 +-
 .../hive/ql/io/CombineHiveInputFormat.java      |  8 +-
 .../hive/ql/io/CombineHiveRecordReader.java     |  2 +-
 .../ql/io/HiveContextAwareRecordReader.java     |  2 +-
 .../hadoop/hive/ql/io/HiveFileFormatUtils.java  | 36 ++++-----
 .../hadoop/hive/ql/io/HiveInputFormat.java      |  2 +-
 .../hive/ql/io/parquet/ProjectionPusher.java    | 24 +++---
 .../hive/ql/optimizer/physical/Vectorizer.java  |  1 +
 .../hive/ql/io/TestHiveFileFormatUtils.java     | 24 +++---
 ql/src/test/queries/clientpositive/mm_all.q     | 16 +++-
 .../results/clientpositive/llap/mm_all.q.out    | 72 ++++++++++++++++++
 ql/src/test/results/clientpositive/mm_all.q.out | 80 +++++++++++++++++++-
 15 files changed, 217 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java
----------------------------------------------------------------------
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java
index c9df7d9..da64c98 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java
@@ -92,8 +92,7 @@ class VectorDeserializeOrcWriter extends EncodingWriter 
implements Runnable {
       return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
     }
     Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
-    PartitionDesc partDesc = 
HiveFileFormatUtils.getPartitionDescFromPathRecursively(
-        parts, path, null);
+    PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, 
path, null);
     if (partDesc == null) {
       LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition 
desc for " + path);
       return new DeserializerOrcWriter(serDe, sourceOi, allocSize);

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
index 5c0c3ed..92a1ebc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
@@ -275,7 +275,7 @@ public class SplitGrouper {
                                        MapWork work) throws IOException {
     boolean retval = false;
     Path path = ((FileSplit) s).getPath();
-    PartitionDesc pd = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    PartitionDesc pd = HiveFileFormatUtils.getFromPathRecursively(
         work.getPathToPartitionInfo(), path, cache);
     String currentDeserializerClass = pd.getDeserializerClassName();
     Class<?> currentInputFormatClass = pd.getInputFileFormatClass();
@@ -288,7 +288,7 @@ public class SplitGrouper {
         return !path.equals(prevPath);
       }
       PartitionDesc prevPD =
-          
HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(),
+          
HiveFileFormatUtils.getFromPathRecursively(work.getPathToPartitionInfo(),
               prevPath, cache);
       previousDeserializerClass = prevPD.getDeserializerClassName();
       previousInputFormatClass = prevPD.getInputFileFormatClass();

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index e546a65..cfa4e2c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -150,7 +150,7 @@ public class VectorizedRowBatchCtx {
         .getMapWork(hiveConf).getPathToPartitionInfo();
 
     PartitionDesc partDesc = HiveFileFormatUtils
-        .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+        .getFromPathRecursively(pathToPartitionInfo,
             split.getPath(), IOPrepareCache.get().getPartitionDescMap());
 
     getPartitionValues(vrbCtx, partDesc, partitionValues);

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
index 0e6ec84..1d77da6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
@@ -75,7 +75,7 @@ public class HiveIndexedInputFormat extends HiveInputFormat {
     PartitionDesc part;
     for (Path dir : dirs) {
       part = HiveFileFormatUtils
-          .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
+          .getFromPathRecursively(pathToPartitionInfo, dir,
               IOPrepareCache.get().allocatePartitionDescMap(), true);
       // create a new InputFormat instance if this is the first time to see 
this
       // class

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
index 86397af..d0ddad4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
@@ -102,7 +102,7 @@ public class CombineHiveInputFormat<K extends 
WritableComparable, V extends Writ
     public Set<Integer> call() throws Exception {
       Set<Integer> nonCombinablePathIndices = new HashSet<Integer>();
       for (int i = 0; i < length; i++) {
-        PartitionDesc part = 
HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+        PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(
                 pathToPartitionInfo, paths[i + start],
                 IOPrepareCache.get().allocatePartitionDescMap());
         // Use HiveInputFormat if any of the paths is not splittable
@@ -170,7 +170,7 @@ public class CombineHiveInputFormat<K extends 
WritableComparable, V extends Writ
         Path[] ipaths = inputSplitShim.getPaths();
         if (ipaths.length > 0) {
           PartitionDesc part = HiveFileFormatUtils
-              .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
+              .getFromPathRecursively(this.pathToPartitionInfo,
                   ipaths[0], IOPrepareCache.get().getPartitionDescMap());
           inputFormatClassName = part.getInputFileFormatClass().getName();
         }
@@ -284,7 +284,7 @@ public class CombineHiveInputFormat<K extends 
WritableComparable, V extends Writ
 
         // extract all the inputFormatClass names for each chunk in the
         // CombinedSplit.
-        PartitionDesc part = 
HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo,
+        PartitionDesc part = 
HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo,
             inputSplitShim.getPath(0), 
IOPrepareCache.get().getPartitionDescMap());
 
         // create a new InputFormat instance if this is the first time to see
@@ -364,7 +364,7 @@ public class CombineHiveInputFormat<K extends 
WritableComparable, V extends Writ
     Set<Path> poolSet = new HashSet<Path>();
 
     for (Path path : paths) {
-      PartitionDesc part = 
HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+      PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(
           pathToPartitionInfo, path, 
IOPrepareCache.get().allocatePartitionDescMap());
       TableDesc tableDesc = part.getTableDesc();
       if ((tableDesc != null) && tableDesc.isNonNative()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
index de36f2b..6911fb7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
@@ -114,7 +114,7 @@ public class CombineHiveRecordReader<K extends 
WritableComparable, V extends Wri
     PartitionDesc part = null;
     Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new 
HashMap<>();
     for (Path path : hsplit.getPaths()) {
-      PartitionDesc otherPart = 
HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+      PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively(
           pathToPartInfo, path, cache);
       LOG.debug("Found spec for " + path + " " + otherPart + " from " + 
pathToPartInfo);
       if (part == null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
index 46f9970..4bc60dc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
@@ -326,7 +326,7 @@ public abstract class HiveContextAwareRecordReader<K, V> 
implements RecordReader
               .getMapWork(jobConf).getPathToPartitionInfo();
           }
           part = HiveFileFormatUtils
-              .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+              .getFromPathRecursively(pathToPartitionInfo,
                   filePath, IOPrepareCache.get().getPartitionDescMap());
         } catch (AssertionError ae) {
           LOG.info("Cannot get partition description from " + 
this.ioCxtRef.getInputPath()

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
index 7727114..1103170 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
@@ -350,39 +350,33 @@ public final class HiveFileFormatUtils {
         .finalDestination(conf.getDestPath()));
   }
 
-  public static PartitionDesc getPartitionDescFromPathRecursively(
-      Map<Path, PartitionDesc> pathToPartitionInfo, Path dir,
-      Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap)
-      throws IOException {
-    return getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
-        cacheMap, false);
+  public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, 
Path dir,
+      Map<Map<Path, T>, Map<Path, T>> cacheMap) throws IOException {
+    return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, false);
   }
 
-  public static PartitionDesc getPartitionDescFromPathRecursively(
-      Map<Path, PartitionDesc> pathToPartitionInfo, Path dir,
-      Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap, 
boolean ignoreSchema)
-          throws IOException {
-
-    PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir);
+  public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, 
Path dir,
+      Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema) throws 
IOException {
+    T part = getFromPath(pathToPartitionInfo, dir);
 
     if (part == null
         && (ignoreSchema
             || (dir.toUri().getScheme() == null || 
dir.toUri().getScheme().trim().equals(""))
             || FileUtils.pathsContainNoScheme(pathToPartitionInfo.keySet()))) {
 
-      Map<Path, PartitionDesc> newPathToPartitionInfo = null;
+      Map<Path, T> newPathToPartitionInfo = null;
       if (cacheMap != null) {
         newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
       }
 
       if (newPathToPartitionInfo == null) { // still null
-        newPathToPartitionInfo = populateNewPartitionDesc(pathToPartitionInfo);
+        newPathToPartitionInfo = populateNewT(pathToPartitionInfo);
 
         if (cacheMap != null) {
           cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
         }
       }
-      part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir);
+      part = getFromPath(newPathToPartitionInfo, dir);
     }
     if (part != null) {
       return part;
@@ -392,18 +386,18 @@ public final class HiveFileFormatUtils {
     }
   }
 
-  private static Map<Path, PartitionDesc> populateNewPartitionDesc(Map<Path, 
PartitionDesc> pathToPartitionInfo) {
-    Map<Path, PartitionDesc> newPathToPartitionInfo = new HashMap<>();
-    for (Map.Entry<Path, PartitionDesc> entry: pathToPartitionInfo.entrySet()) 
{
-      PartitionDesc partDesc = entry.getValue();
+  private static <T> Map<Path, T> populateNewT(Map<Path, T> 
pathToPartitionInfo) {
+    Map<Path, T> newPathToPartitionInfo = new HashMap<>();
+    for (Map.Entry<Path, T> entry: pathToPartitionInfo.entrySet()) {
+      T partDesc = entry.getValue();
       Path pathOnly = Path.getPathWithoutSchemeAndAuthority(entry.getKey());
       newPathToPartitionInfo.put(pathOnly, partDesc);
     }
     return newPathToPartitionInfo;
   }
 
-  private static PartitionDesc doGetPartitionDescFromPath(
-      Map<Path, PartitionDesc> pathToPartitionInfo, Path dir) {
+  private static <T> T getFromPath(
+      Map<Path, T> pathToPartitionInfo, Path dir) {
     
     // We first do exact match, and then do prefix matching. The latter is due 
to input dir
     // could be /dir/ds='2001-02-21'/part-03 where part-03 is not part of 
partition

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 921aa53..1d3485e 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -348,7 +348,7 @@ public class HiveInputFormat<K extends WritableComparable, 
V extends Writable>
     }
 
     boolean nonNative = false;
-    PartitionDesc part = 
HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, hsplit.getPath(), null);
     LOG.debug("Found spec for " + hsplit.getPath() + " " + part + " from " + 
pathToPartitionInfo);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
index 68407f5..e608932 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
@@ -83,17 +84,19 @@ public class ProjectionPusher {
     }
 
     final Set<String> aliases = new HashSet<String>();
-    final Iterator<Entry<Path, ArrayList<String>>> iterator =
-        mapWork.getPathToAliases().entrySet().iterator();
-
-    while (iterator.hasNext()) {
-      final Entry<Path, ArrayList<String>> entry = iterator.next();
-      final String key = entry.getKey().toUri().getPath();
-      if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) {
-        aliases.addAll(entry.getValue());
+    try {
+      ArrayList<String> a = HiveFileFormatUtils.getFromPathRecursively(
+          mapWork.getPathToAliases(), new Path(splitPath), null);
+      if (a != null) {
+        aliases.addAll(a);
       }
+      if (a == null || a.isEmpty()) {
+        // TODO: not having aliases for path usually means some bug. Should it 
give up?
+        LOG.warn("Couldn't find aliases for " + splitPath);
+      }
+    } catch (IllegalArgumentException | IOException e) {
+      throw new RuntimeException(e);
     }
-
     // Collect the needed columns from all the aliases and create ORed filter
     // expression for the table.
     boolean allColumnsNeeded = false;
@@ -181,7 +184,8 @@ public class ProjectionPusher {
       throws IOException {
     updateMrWork(jobConf);  // TODO: refactor this in HIVE-6366
     final JobConf cloneJobConf = new JobConf(jobConf);
-    final PartitionDesc part = pathToPartitionInfo.get(path);
+    final PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(
+        pathToPartitionInfo, path, null);
 
     if ((part != null) && (part.getTableDesc() != null)) {
       Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), 
cloneJobConf);

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index fadbc20..2a82206 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -908,6 +908,7 @@ public class Vectorizer implements PhysicalPlanResolver {
           setOperatorIssue("Alias " + alias + " not present in aliases " + 
aliases);
           return new ImmutablePair<Boolean,Boolean>(false, false);
         }
+        // TODO: should this use getPartitionDescFromPathRecursively?
         PartitionDesc partDesc = pathToPartitionInfo.get(path);
         if (partDesc.getVectorPartitionDesc() != null) {
           // We've seen this already.

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
index dae85a6..72910d0 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
@@ -46,19 +46,19 @@ public class TestHiveFileFormatUtils extends TestCase {
     // first group
     PartitionDesc ret = null;
     
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("file:///tbl/par1/part2/part3"),
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("file:///tbl/par1/part2/part3 not found.", partDesc_3, ret);
 
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("/tbl/par1/part2/part3"), 
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("/tbl/par1/part2/part3 not found.", partDesc_3, ret);
 
     boolean exception = false;
     try {
-      ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+      ret = HiveFileFormatUtils.getFromPathRecursively(
           pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part3"),
           IOPrepareCache.get().allocatePartitionDescMap());
     } catch (IOException e) {
@@ -69,17 +69,17 @@ public class TestHiveFileFormatUtils extends TestCase {
     exception = false;
 
     // second group
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("file:///tbl/par1/part2/part4"),
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("file:///tbl/par1/part2/part4 not found.", partDesc_4, ret);
 
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("/tbl/par1/part2/part4"), 
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("/tbl/par1/part2/part4 not found.", partDesc_4, ret);
 
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part4"),
         IOPrepareCache.get().allocatePartitionDescMap());
 
@@ -87,24 +87,24 @@ public class TestHiveFileFormatUtils extends TestCase {
         ret);
 
     // third group
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("file:///tbl/par1/part2/part5"),
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("file:///tbl/par1/part2/part5 not found.", partDesc_5, ret);
 
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("/tbl/par1/part2/part5"), 
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("/tbl/par1/part2/part5 not found.", partDesc_5, ret);
 
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part5"),
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("hdfs:///tbl/par1/part2/part5 not found", partDesc_5, ret);
 
     // fourth group
     try {
-      ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+      ret = HiveFileFormatUtils.getFromPathRecursively(
           pathToPartitionInfo, new Path("file:///tbl/par1/part2/part6"),
           IOPrepareCache.get().allocatePartitionDescMap());
     } catch (IOException e) {
@@ -114,12 +114,12 @@ public class TestHiveFileFormatUtils extends TestCase {
         exception);
     exception = false;
 
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("/tbl/par1/part2/part6"), 
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("/tbl/par1/part2/part6 not found.", partDesc_6, ret);
 
-    ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
+    ret = HiveFileFormatUtils.getFromPathRecursively(
         pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part6"),
         IOPrepareCache.get().allocatePartitionDescMap());
     assertEquals("hdfs:///tbl/par1/part2/part6 not found.", partDesc_6, ret);

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/queries/clientpositive/mm_all.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mm_all.q 
b/ql/src/test/queries/clientpositive/mm_all.q
index db16920..a79d917 100644
--- a/ql/src/test/queries/clientpositive/mm_all.q
+++ b/ql/src/test/queries/clientpositive/mm_all.q
@@ -448,7 +448,21 @@ drop table skewjoin_mm;
 
 set hive.optimize.skewjoin=false;
 
-
+set hive.optimize.index.filter=true;
+set hive.auto.convert.join=false;
+CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties 
("transactional"="true", "transactional_properties"="insert_only");
+INSERT INTO parquet1_mm VALUES(1), (2);
+CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties 
("transactional"="true", "transactional_properties"="insert_only");
+INSERT INTO parquet2_mm VALUES(1, 'value1');
+INSERT INTO parquet2_mm VALUES(1, 'value2');
+select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm
+  JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id
+  JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id
+where t1.value = 'value1' and t2.value = 'value2';
+drop table parquet1_mm;
+drop table parquet2_mm;
+
+set hive.auto.convert.join=true;
 
 drop table intermediate;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/results/clientpositive/llap/mm_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out 
b/ql/src/test/results/clientpositive/llap/mm_all.q.out
index 4bb480a..062d60f 100644
--- a/ql/src/test/results/clientpositive/llap/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out
@@ -3159,6 +3159,78 @@ POSTHOOK: query: drop table skewjoin_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@skewjoin_mm
 POSTHOOK: Output: default@skewjoin_mm
+PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET 
tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet1_mm
+POSTHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET 
tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet1_mm
+PREHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet1_mm
+POSTHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet1_mm
+POSTHOOK: Lineage: parquet1_mm.id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS 
PARQUET tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS 
PARQUET tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet2_mm
+PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet2_mm
+POSTHOOK: Lineage: parquet2_mm.id EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: parquet2_mm.value SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet2_mm
+POSTHOOK: Lineage: parquet2_mm.id EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: parquet2_mm.value SIMPLE 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm
+  JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id
+  JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id
+where t1.value = 'value1' and t2.value = 'value2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet1_mm
+PREHOOK: Input: default@parquet2_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm
+  JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id
+  JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id
+where t1.value = 'value1' and t2.value = 'value2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet1_mm
+POSTHOOK: Input: default@parquet2_mm
+#### A masked pattern was here ####
+1      value1  value2
+PREHOOK: query: drop table parquet1_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@parquet1_mm
+PREHOOK: Output: default@parquet1_mm
+POSTHOOK: query: drop table parquet1_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@parquet1_mm
+POSTHOOK: Output: default@parquet1_mm
+PREHOOK: query: drop table parquet2_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@parquet2_mm
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: drop table parquet2_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@parquet2_mm
+POSTHOOK: Output: default@parquet2_mm
 PREHOOK: query: drop table intermediate
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@intermediate

http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/results/clientpositive/mm_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mm_all.q.out 
b/ql/src/test/results/clientpositive/mm_all.q.out
index 26854bd..b418ea1 100644
--- a/ql/src/test/results/clientpositive/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/mm_all.q.out
@@ -1165,10 +1165,10 @@ POSTHOOK: Input: default@merge1_mm@key=103
 POSTHOOK: Input: default@merge1_mm@key=97
 POSTHOOK: Input: default@merge1_mm@key=98
 #### A masked pattern was here ####
-103    103
-100    100
 98     98
+103    103
 97     97
+100    100
 0      0
 10     10
 PREHOOK: query: insert into table merge1_mm partition (key) select key, key 
from intermediate
@@ -1217,16 +1217,16 @@ POSTHOOK: Input: default@merge1_mm@key=97
 POSTHOOK: Input: default@merge1_mm@key=98
 #### A masked pattern was here ####
 100    100
+100    100
 103    103
 97     97
-100    100
 103    103
 97     97
 98     98
 98     98
 0      0
-0      0
 10     10
+0      0
 10     10
 PREHOOK: query: drop table merge1_mm
 PREHOOK: type: DROPTABLE
@@ -3184,6 +3184,78 @@ POSTHOOK: query: drop table skewjoin_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@skewjoin_mm
 POSTHOOK: Output: default@skewjoin_mm
+PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET 
tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet1_mm
+POSTHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET 
tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet1_mm
+PREHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet1_mm
+POSTHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet1_mm
+POSTHOOK: Lineage: parquet1_mm.id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS 
PARQUET tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS 
PARQUET tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet2_mm
+PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet2_mm
+POSTHOOK: Lineage: parquet2_mm.id EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: parquet2_mm.value SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet2_mm
+POSTHOOK: Lineage: parquet2_mm.id EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: parquet2_mm.value SIMPLE 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm
+  JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id
+  JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id
+where t1.value = 'value1' and t2.value = 'value2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet1_mm
+PREHOOK: Input: default@parquet2_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm
+  JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id
+  JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id
+where t1.value = 'value1' and t2.value = 'value2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet1_mm
+POSTHOOK: Input: default@parquet2_mm
+#### A masked pattern was here ####
+1      value1  value2
+PREHOOK: query: drop table parquet1_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@parquet1_mm
+PREHOOK: Output: default@parquet1_mm
+POSTHOOK: query: drop table parquet1_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@parquet1_mm
+POSTHOOK: Output: default@parquet1_mm
+PREHOOK: query: drop table parquet2_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@parquet2_mm
+PREHOOK: Output: default@parquet2_mm
+POSTHOOK: query: drop table parquet2_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@parquet2_mm
+POSTHOOK: Output: default@parquet2_mm
 PREHOOK: query: drop table intermediate
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@intermediate

hive git commit: HIVE-15870 : MM tables - parquet_join test fails (Sergey Shelukhin)

Reply via email to