Repository: hive Updated Branches: refs/heads/hive-14535 f883d67e8 -> e76e8d0ad
HIVE-15870 : MM tables - parquet_join test fails (Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e76e8d0a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e76e8d0a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e76e8d0a Branch: refs/heads/hive-14535 Commit: e76e8d0ad60147179b29bbd5a39b0bac5e524065 Parents: f883d67 Author: Sergey Shelukhin <ser...@apache.org> Authored: Wed Mar 1 18:56:02 2017 -0800 Committer: Sergey Shelukhin <ser...@apache.org> Committed: Wed Mar 1 18:56:02 2017 -0800 ---------------------------------------------------------------------- .../io/encoded/VectorDeserializeOrcWriter.java | 3 +- .../hadoop/hive/ql/exec/tez/SplitGrouper.java | 4 +- .../ql/exec/vector/VectorizedRowBatchCtx.java | 2 +- .../hive/ql/index/HiveIndexedInputFormat.java | 2 +- .../hive/ql/io/CombineHiveInputFormat.java | 8 +- .../hive/ql/io/CombineHiveRecordReader.java | 2 +- .../ql/io/HiveContextAwareRecordReader.java | 2 +- .../hadoop/hive/ql/io/HiveFileFormatUtils.java | 36 ++++----- .../hadoop/hive/ql/io/HiveInputFormat.java | 2 +- .../hive/ql/io/parquet/ProjectionPusher.java | 24 +++--- .../hive/ql/optimizer/physical/Vectorizer.java | 1 + .../hive/ql/io/TestHiveFileFormatUtils.java | 24 +++--- ql/src/test/queries/clientpositive/mm_all.q | 16 +++- .../results/clientpositive/llap/mm_all.q.out | 72 ++++++++++++++++++ ql/src/test/results/clientpositive/mm_all.q.out | 80 +++++++++++++++++++- 15 files changed, 217 insertions(+), 61 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java index c9df7d9..da64c98 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java @@ -92,8 +92,7 @@ class VectorDeserializeOrcWriter extends EncodingWriter implements Runnable { return new DeserializerOrcWriter(serDe, sourceOi, allocSize); } Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath); - PartitionDesc partDesc = HiveFileFormatUtils.getPartitionDescFromPathRecursively( - parts, path, null); + PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null); if (partDesc == null) { LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path); return new DeserializerOrcWriter(serDe, sourceOi, allocSize); http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java index 5c0c3ed..92a1ebc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java @@ -275,7 +275,7 @@ public class SplitGrouper { MapWork work) throws IOException { boolean retval = false; Path path = ((FileSplit) s).getPath(); - PartitionDesc pd = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc pd = HiveFileFormatUtils.getFromPathRecursively( work.getPathToPartitionInfo(), path, cache); String currentDeserializerClass = pd.getDeserializerClassName(); Class<?> currentInputFormatClass = pd.getInputFileFormatClass(); @@ -288,7 +288,7 @@ public class SplitGrouper { return !path.equals(prevPath); } PartitionDesc prevPD = - HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(), + HiveFileFormatUtils.getFromPathRecursively(work.getPathToPartitionInfo(), prevPath, cache); previousDeserializerClass = prevPD.getDeserializerClassName(); previousInputFormatClass = prevPD.getInputFileFormatClass(); http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index e546a65..cfa4e2c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -150,7 +150,7 @@ public class VectorizedRowBatchCtx { .getMapWork(hiveConf).getPathToPartitionInfo(); PartitionDesc partDesc = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(pathToPartitionInfo, + .getFromPathRecursively(pathToPartitionInfo, split.getPath(), IOPrepareCache.get().getPartitionDescMap()); getPartitionValues(vrbCtx, partDesc, partitionValues); http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java index 0e6ec84..1d77da6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java @@ -75,7 +75,7 @@ public class HiveIndexedInputFormat extends HiveInputFormat { PartitionDesc part; for (Path dir : dirs) { part = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, + .getFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true); // create a new InputFormat instance if this is the first time to see this // class http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java index 86397af..d0ddad4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java @@ -102,7 +102,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ public Set<Integer> call() throws Exception { Set<Integer> nonCombinablePathIndices = new HashSet<Integer>(); for (int i = 0; i < length; i++) { - PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, paths[i + start], IOPrepareCache.get().allocatePartitionDescMap()); // Use HiveInputFormat if any of the paths is not splittable @@ -170,7 +170,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ Path[] ipaths = inputSplitShim.getPaths(); if (ipaths.length > 0) { PartitionDesc part = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(this.pathToPartitionInfo, + .getFromPathRecursively(this.pathToPartitionInfo, ipaths[0], IOPrepareCache.get().getPartitionDescMap()); inputFormatClassName = part.getInputFileFormatClass().getName(); } @@ -284,7 +284,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ // extract all the inputFormatClass names for each chunk in the // CombinedSplit. - PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see @@ -364,7 +364,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ Set<Path> poolSet = new HashSet<Path>(); for (Path path : paths) { - PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) { http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java index de36f2b..6911fb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java @@ -114,7 +114,7 @@ public class CombineHiveRecordReader<K extends WritableComparable, V extends Wri PartitionDesc part = null; Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>(); for (Path path : hsplit.getPaths()) { - PartitionDesc otherPart = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively( pathToPartInfo, path, cache); LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo); if (part == null) { http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java index 46f9970..4bc60dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java @@ -326,7 +326,7 @@ public abstract class HiveContextAwareRecordReader<K, V> implements RecordReader .getMapWork(jobConf).getPathToPartitionInfo(); } part = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(pathToPartitionInfo, + .getFromPathRecursively(pathToPartitionInfo, filePath, IOPrepareCache.get().getPartitionDescMap()); } catch (AssertionError ae) { LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java index 7727114..1103170 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java @@ -350,39 +350,33 @@ public final class HiveFileFormatUtils { .finalDestination(conf.getDestPath())); } - public static PartitionDesc getPartitionDescFromPathRecursively( - Map<Path, PartitionDesc> pathToPartitionInfo, Path dir, - Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap) - throws IOException { - return getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, - cacheMap, false); + public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir, + Map<Map<Path, T>, Map<Path, T>> cacheMap) throws IOException { + return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, false); } - public static PartitionDesc getPartitionDescFromPathRecursively( - Map<Path, PartitionDesc> pathToPartitionInfo, Path dir, - Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap, boolean ignoreSchema) - throws IOException { - - PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir); + public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir, + Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema) throws IOException { + T part = getFromPath(pathToPartitionInfo, dir); if (part == null && (ignoreSchema || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim().equals("")) || FileUtils.pathsContainNoScheme(pathToPartitionInfo.keySet()))) { - Map<Path, PartitionDesc> newPathToPartitionInfo = null; + Map<Path, T> newPathToPartitionInfo = null; if (cacheMap != null) { newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo); } if (newPathToPartitionInfo == null) { // still null - newPathToPartitionInfo = populateNewPartitionDesc(pathToPartitionInfo); + newPathToPartitionInfo = populateNewT(pathToPartitionInfo); if (cacheMap != null) { cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo); } } - part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir); + part = getFromPath(newPathToPartitionInfo, dir); } if (part != null) { return part; @@ -392,18 +386,18 @@ public final class HiveFileFormatUtils { } } - private static Map<Path, PartitionDesc> populateNewPartitionDesc(Map<Path, PartitionDesc> pathToPartitionInfo) { - Map<Path, PartitionDesc> newPathToPartitionInfo = new HashMap<>(); - for (Map.Entry<Path, PartitionDesc> entry: pathToPartitionInfo.entrySet()) { - PartitionDesc partDesc = entry.getValue(); + private static <T> Map<Path, T> populateNewT(Map<Path, T> pathToPartitionInfo) { + Map<Path, T> newPathToPartitionInfo = new HashMap<>(); + for (Map.Entry<Path, T> entry: pathToPartitionInfo.entrySet()) { + T partDesc = entry.getValue(); Path pathOnly = Path.getPathWithoutSchemeAndAuthority(entry.getKey()); newPathToPartitionInfo.put(pathOnly, partDesc); } return newPathToPartitionInfo; } - private static PartitionDesc doGetPartitionDescFromPath( - Map<Path, PartitionDesc> pathToPartitionInfo, Path dir) { + private static <T> T getFromPath( + Map<Path, T> pathToPartitionInfo, Path dir) { // We first do exact match, and then do prefix matching. The latter is due to input dir // could be /dir/ds='2001-02-21'/part-03 where part-03 is not part of partition http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 921aa53..1d3485e 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -348,7 +348,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> } boolean nonNative = false; - PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, hsplit.getPath(), null); LOG.debug("Found spec for " + hsplit.getPath() + " " + part + " from " + pathToPartitionInfo); http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java index 68407f5..e608932 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -83,17 +84,19 @@ public class ProjectionPusher { } final Set<String> aliases = new HashSet<String>(); - final Iterator<Entry<Path, ArrayList<String>>> iterator = - mapWork.getPathToAliases().entrySet().iterator(); - - while (iterator.hasNext()) { - final Entry<Path, ArrayList<String>> entry = iterator.next(); - final String key = entry.getKey().toUri().getPath(); - if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) { - aliases.addAll(entry.getValue()); + try { + ArrayList<String> a = HiveFileFormatUtils.getFromPathRecursively( + mapWork.getPathToAliases(), new Path(splitPath), null); + if (a != null) { + aliases.addAll(a); } + if (a == null || a.isEmpty()) { + // TODO: not having aliases for path usually means some bug. Should it give up? + LOG.warn("Couldn't find aliases for " + splitPath); + } + } catch (IllegalArgumentException | IOException e) { + throw new RuntimeException(e); } - // Collect the needed columns from all the aliases and create ORed filter // expression for the table. boolean allColumnsNeeded = false; @@ -181,7 +184,8 @@ public class ProjectionPusher { throws IOException { updateMrWork(jobConf); // TODO: refactor this in HIVE-6366 final JobConf cloneJobConf = new JobConf(jobConf); - final PartitionDesc part = pathToPartitionInfo.get(path); + final PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( + pathToPartitionInfo, path, null); if ((part != null) && (part.getTableDesc() != null)) { Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index fadbc20..2a82206 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -908,6 +908,7 @@ public class Vectorizer implements PhysicalPlanResolver { setOperatorIssue("Alias " + alias + " not present in aliases " + aliases); return new ImmutablePair<Boolean,Boolean>(false, false); } + // TODO: should this use getPartitionDescFromPathRecursively? PartitionDesc partDesc = pathToPartitionInfo.get(path); if (partDesc.getVectorPartitionDesc() != null) { // We've seen this already. http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java index dae85a6..72910d0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java @@ -46,19 +46,19 @@ public class TestHiveFileFormatUtils extends TestCase { // first group PartitionDesc ret = null; - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part3 not found.", partDesc_3, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part3 not found.", partDesc_3, ret); boolean exception = false; try { - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); } catch (IOException e) { @@ -69,17 +69,17 @@ public class TestHiveFileFormatUtils extends TestCase { exception = false; // second group - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part4 not found.", partDesc_4, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part4 not found.", partDesc_4, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); @@ -87,24 +87,24 @@ public class TestHiveFileFormatUtils extends TestCase { ret); // third group - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part5"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part5 not found.", partDesc_5, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part5"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part5 not found.", partDesc_5, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part5"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("hdfs:///tbl/par1/part2/part5 not found", partDesc_5, ret); // fourth group try { - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part6"), IOPrepareCache.get().allocatePartitionDescMap()); } catch (IOException e) { @@ -114,12 +114,12 @@ public class TestHiveFileFormatUtils extends TestCase { exception); exception = false; - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part6"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part6 not found.", partDesc_6, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part6"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("hdfs:///tbl/par1/part2/part6 not found.", partDesc_6, ret); http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/queries/clientpositive/mm_all.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_all.q b/ql/src/test/queries/clientpositive/mm_all.q index db16920..a79d917 100644 --- a/ql/src/test/queries/clientpositive/mm_all.q +++ b/ql/src/test/queries/clientpositive/mm_all.q @@ -448,7 +448,21 @@ drop table skewjoin_mm; set hive.optimize.skewjoin=false; - +set hive.optimize.index.filter=true; +set hive.auto.convert.join=false; +CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +INSERT INTO parquet1_mm VALUES(1), (2); +CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +INSERT INTO parquet2_mm VALUES(1, 'value1'); +INSERT INTO parquet2_mm VALUES(1, 'value2'); +select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2'; +drop table parquet1_mm; +drop table parquet2_mm; + +set hive.auto.convert.join=true; drop table intermediate; http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/results/clientpositive/llap/mm_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out index 4bb480a..062d60f 100644 --- a/ql/src/test/results/clientpositive/llap/mm_all.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -3159,6 +3159,78 @@ POSTHOOK: query: drop table skewjoin_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@skewjoin_mm POSTHOOK: Output: default@skewjoin_mm +PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet1_mm +POSTHOOK: Lineage: parquet1_mm.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet2_mm +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet1_mm +PREHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +POSTHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +1 value1 value2 +PREHOOK: query: drop table parquet1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet1_mm +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: drop table parquet1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: drop table parquet2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet2_mm +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: drop table parquet2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet2_mm +POSTHOOK: Output: default@parquet2_mm PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE PREHOOK: Input: default@intermediate http://git-wip-us.apache.org/repos/asf/hive/blob/e76e8d0a/ql/src/test/results/clientpositive/mm_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mm_all.q.out b/ql/src/test/results/clientpositive/mm_all.q.out index 26854bd..b418ea1 100644 --- a/ql/src/test/results/clientpositive/mm_all.q.out +++ b/ql/src/test/results/clientpositive/mm_all.q.out @@ -1165,10 +1165,10 @@ POSTHOOK: Input: default@merge1_mm@key=103 POSTHOOK: Input: default@merge1_mm@key=97 POSTHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### -103 103 -100 100 98 98 +103 103 97 97 +100 100 0 0 10 10 PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate @@ -1217,16 +1217,16 @@ POSTHOOK: Input: default@merge1_mm@key=97 POSTHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### 100 100 +100 100 103 103 97 97 -100 100 103 103 97 97 98 98 98 98 0 0 -0 0 10 10 +0 0 10 10 PREHOOK: query: drop table merge1_mm PREHOOK: type: DROPTABLE @@ -3184,6 +3184,78 @@ POSTHOOK: query: drop table skewjoin_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@skewjoin_mm POSTHOOK: Output: default@skewjoin_mm +PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet1_mm +POSTHOOK: Lineage: parquet1_mm.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet2_mm +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet1_mm +PREHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +POSTHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +1 value1 value2 +PREHOOK: query: drop table parquet1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet1_mm +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: drop table parquet1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: drop table parquet2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet2_mm +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: drop table parquet2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet2_mm +POSTHOOK: Output: default@parquet2_mm PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE PREHOOK: Input: default@intermediate