[2/2] hive git commit: HIVE-10636 : CASE comparison operator rotation optimization (Ashutosh Chauhan via Gopal V)
HIVE-10636 : CASE comparison operator rotation optimization (Ashutosh Chauhan via Gopal V) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b1ad20f9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b1ad20f9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b1ad20f9 Branch: refs/heads/branch-1.2 Commit: b1ad20f9404f01dfc6ea95ad0af614901d4cb4b3 Parents: 5984bec Author: Ashutosh Chauhan Authored: Tue May 12 14:58:06 2015 -0700 Committer: Ashutosh Chauhan Committed: Fri May 15 18:31:38 2015 -0700 -- .../hive/ql/exec/ExprNodeEvaluatorFactory.java | 7 - .../ql/exec/vector/VectorizationContext.java| 13 +- .../optimizer/ConstantPropagateProcFactory.java | 86 +-- .../hive/ql/optimizer/GroupByOptimizer.java | 7 +- .../PrunerExpressionOperatorFactory.java| 3 - .../hive/ql/optimizer/SimpleFetchOptimizer.java | 2 - .../calcite/translator/RexNodeConverter.java| 5 +- .../ql/optimizer/lineage/ExprProcFactory.java | 3 +- .../ql/optimizer/pcr/PcrExprProcFactory.java| 3 +- .../hive/ql/parse/TableAccessAnalyzer.java | 6 +- .../hive/ql/parse/TypeCheckProcFactory.java | 4 +- .../hive/ql/plan/ExprNodeConstantDesc.java | 14 +- .../hadoop/hive/ql/plan/ExprNodeNullDesc.java | 69 -- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 12 +- .../hive/ql/udf/generic/GenericUDFCoalesce.java | 2 +- .../hive/ql/udf/generic/GenericUDFGreatest.java | 1 + .../hive/ql/udf/generic/GenericUDFInstr.java| 2 +- .../hive/ql/udf/generic/GenericUDFLocate.java | 2 +- .../hive/ql/udf/generic/GenericUDFPrintf.java | 3 +- .../ql/udf/generic/GenericUDFTranslate.java | 8 +- .../hive/ql/udf/generic/GenericUDFUtils.java| 6 +- .../clientpositive/fold_eq_with_case_when.q | 21 ++ .../clientpositive/annotate_stats_filter.q.out | 4 +- .../clientpositive/fold_eq_with_case_when.q.out | 231 +++ .../test/results/clientpositive/fold_when.q.out | 2 +- ql/src/test/results/clientpositive/input6.q.out | 2 +- .../results/clientpositive/join_nullsafe.q.out | 10 +- .../clientpositive/spark/join_nullsafe.q.out| 10 +- .../subquery_notin_having.q.java1.7.out | 2 +- .../clientpositive/tez/join_nullsafe.q.out | 14 +- .../clientpositive/tez/vector_coalesce.q.out| 26 +-- .../clientpositive/vector_coalesce.q.out| 6 +- .../hive/serde2/io/HiveVarcharWritable.java | 7 +- .../ObjectInspectorConverters.java | 5 +- .../objectinspector/ObjectInspectorUtils.java | 4 +- .../AbstractPrimitiveObjectInspector.java | 1 + .../primitive/WritableVoidObjectInspector.java | 5 + 37 files changed, 396 insertions(+), 212 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/b1ad20f9/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java index a149571..f08321c 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java @@ -27,7 +27,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.NullWritable; @@ -57,12 +56,6 @@ public final class ExprNodeEvaluatorFactory { if (desc instanceof ExprNodeFieldDesc) { return new ExprNodeFieldEvaluator((ExprNodeFieldDesc) desc); } -// Null node, a constant node with value NULL and no type information -if (desc instanceof ExprNodeNullDesc) { - return new ExprNodeConstantEvaluator(new ExprNodeConstantDesc(TypeInfoFactory - .getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class), null)); -} - throw new RuntimeException( "Cannot find ExprNodeEvaluator for the exprNodeDesc = " + desc); } http://git-wip-us.apache.org/repos/asf/hive/blob/b1ad20f9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index df39218..48f34a9 100644 --- a/ql/src/java/org/apache/hadoop/hive
[1/2] hive git commit: HIVE-10325 : Remove ExprNodeNullEvaluator (Ashutosh Chauhan via Gopal V)
Repository: hive Updated Branches: refs/heads/branch-1.2 d421201cc -> b1ad20f94 HIVE-10325 : Remove ExprNodeNullEvaluator (Ashutosh Chauhan via Gopal V) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5984bec5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5984bec5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5984bec5 Branch: refs/heads/branch-1.2 Commit: 5984bec5c844f23d98b5e0da42d4784a83eb93ac Parents: d421201 Author: Ashutosh Chauhan Authored: Tue Apr 14 13:46:50 2015 -0700 Committer: Ashutosh Chauhan Committed: Fri May 15 18:31:10 2015 -0700 -- .../hive/ql/exec/ExprNodeEvaluatorFactory.java | 11 +++-- .../ql/exec/ExprNodeGenericFuncEvaluator.java | 3 +- .../hive/ql/exec/ExprNodeNullEvaluator.java | 47 .../hive/ql/plan/ExprNodeConstantDesc.java | 1 - 4 files changed, 9 insertions(+), 53 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5984bec5/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java index ff0ddc8..a149571 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java @@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.NullWritable; /** * ExprNodeEvaluatorFactory. @@ -57,7 +59,8 @@ public final class ExprNodeEvaluatorFactory { } // Null node, a constant node with value NULL and no type information if (desc instanceof ExprNodeNullDesc) { - return new ExprNodeNullEvaluator((ExprNodeNullDesc) desc); + return new ExprNodeConstantEvaluator(new ExprNodeConstantDesc(TypeInfoFactory + .getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class), null)); } throw new RuntimeException( @@ -114,14 +117,14 @@ public final class ExprNodeEvaluatorFactory { private static class EvaluatorContext { -private final Map cached = +private final Map cached = new HashMap(); private boolean hasReference; public ExprNodeEvaluator getEvaluated(ExprNodeEvaluator eval) { - ExprNodeDesc.ExprNodeDescEqualityWrapper key = - new ExprNodeDesc.ExprNodeDescEqualityWrapper(eval.expr); + ExprNodeDesc.ExprNodeDescEqualityWrapper key = + new ExprNodeDesc.ExprNodeDescEqualityWrapper(eval.expr); ExprNodeEvaluator prev = cached.get(key); if (prev == null) { cached.put(key, eval); http://git-wip-us.apache.org/repos/asf/hive/blob/5984bec5/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java index b695bef..b09b706 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java @@ -78,9 +78,10 @@ public class ExprNodeGenericFuncEvaluator extends ExprNodeEvaluatorhttp://git-wip-us.apache.org/repos/asf/hive/blob/5984bec5/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java deleted file mode 100644 index 3aaf17c..000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on a
[1/2] hive git commit: Revert "HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)"
Repository: hive Updated Branches: refs/heads/llap 41178e39d -> 0db5368c0 Revert "HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)" This reverts commit 41178e39ddc5d43ff4abe0b147fd902617944aa3. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6701aa04 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6701aa04 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6701aa04 Branch: refs/heads/llap Commit: 6701aa04ef1d194369f702f88eb4b47123f42958 Parents: 41178e3 Author: Prasanth Jayachandran Authored: Fri May 15 15:27:20 2015 -0700 Committer: Prasanth Jayachandran Committed: Fri May 15 15:27:20 2015 -0700 -- .../hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/6701aa04/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java index 36f2246..b16a5c4 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.conf.Configuration; @@ -211,7 +212,7 @@ public class TaskRunnerCallable extends CallableWithNdc { // TODO Fix UGI and FS Handling. Closing UGI here causes some errors right now. //FileSystem.closeAllForUGI(taskUgi); LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" + - runtimeWatch.stop().elapsedMillis(); + runtimeWatch.stop().elapsed(TimeUnit.MILLISECONDS)); if (LOG.isDebugEnabled()) { LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish()); } @@ -376,10 +377,10 @@ public class TaskRunnerCallable extends CallableWithNdc { LOG.info("Killed task {}", requestId); if (killtimerWatch.isRunning()) { killtimerWatch.stop(); -long elapsed = killtimerWatch.elapsedMillis(); +long elapsed = killtimerWatch.elapsed(TimeUnit.MILLISECONDS); LOG.info("Time to die for task {}", elapsed); } - metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis(); + metrics.incrPreemptionTimeLost(runtimeWatch.elapsed(TimeUnit.MILLISECONDS)); metrics.incrExecutorTotalKilled(); break; case COMMUNICATION_FAILURE:
svn commit: r9016 - in /release/hive: ./ hive-1.2.0/
Author: vikram Date: Fri May 15 22:36:58 2015 New Revision: 9016 Log: Add hive-1.2.0 to svn dist Added: release/hive/hive-1.2.0/ release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz (with props) release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.asc (with props) release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5 release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz (with props) release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.asc (with props) release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5 Modified: release/hive/stable Added: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz == Binary file - no diff available. Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz -- svn:mime-type = application/x-gzip Added: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.asc == Binary file - no diff available. Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.asc -- svn:mime-type = application/pgp-signature Added: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5 == --- release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5 (added) +++ release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5 Fri May 15 22:36:58 2015 @@ -0,0 +1 @@ +17871eea4d087695ac5d0d03386e4ec2 apache-hive-1.2.0-bin.tar.gz Added: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz == Binary file - no diff available. Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz -- svn:mime-type = application/x-gzip Added: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.asc == Binary file - no diff available. Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.asc -- svn:mime-type = application/pgp-signature Added: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5 == --- release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5 (added) +++ release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5 Fri May 15 22:36:58 2015 @@ -0,0 +1 @@ +55f3bf70ec40041ecad8dcdc0435d6b6 apache-hive-1.2.0-src.tar.gz Modified: release/hive/stable == --- release/hive/stable (original) +++ release/hive/stable Fri May 15 22:36:58 2015 @@ -1 +1 @@ -link hive-1.1.0 \ No newline at end of file +link hive-1.2.0 \ No newline at end of file
[2/2] hive git commit: HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)
HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0db5368c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0db5368c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0db5368c Branch: refs/heads/llap Commit: 0db5368c0cc0f4329916a72a11c9d99a02971bac Parents: 6701aa0 Author: Prasanth Jayachandran Authored: Fri May 15 15:36:08 2015 -0700 Committer: Prasanth Jayachandran Committed: Fri May 15 15:36:08 2015 -0700 -- .../hive/llap/daemon/impl/TaskRunnerCallable.java | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0db5368c/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java index b16a5c4..72fcf3b 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java @@ -26,7 +26,6 @@ import java.util.Map; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.conf.Configuration; @@ -59,6 +58,9 @@ import org.apache.tez.runtime.internals.api.TaskReporterInterface; import org.apache.tez.runtime.library.input.UnorderedKVInput; import org.apache.tez.runtime.task.EndReason; import org.apache.tez.runtime.task.TaskRunner2Result; +import org.apache.tez.runtime.task.TezTaskRunner2; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Stopwatch; import com.google.common.collect.HashMultimap; @@ -67,9 +69,6 @@ import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.tez.runtime.task.TezTaskRunner2; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * @@ -212,7 +211,7 @@ public class TaskRunnerCallable extends CallableWithNdc { // TODO Fix UGI and FS Handling. Closing UGI here causes some errors right now. //FileSystem.closeAllForUGI(taskUgi); LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" + - runtimeWatch.stop().elapsed(TimeUnit.MILLISECONDS)); + runtimeWatch.stop().elapsedMillis()); if (LOG.isDebugEnabled()) { LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish()); } @@ -377,10 +376,10 @@ public class TaskRunnerCallable extends CallableWithNdc { LOG.info("Killed task {}", requestId); if (killtimerWatch.isRunning()) { killtimerWatch.stop(); -long elapsed = killtimerWatch.elapsed(TimeUnit.MILLISECONDS); +long elapsed = killtimerWatch.elapsedMillis(); LOG.info("Time to die for task {}", elapsed); } - metrics.incrPreemptionTimeLost(runtimeWatch.elapsed(TimeUnit.MILLISECONDS)); + metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis()); metrics.incrExecutorTotalKilled(); break; case COMMUNICATION_FAILURE:
hive git commit: HIVE-10670: Duplicate declaration of curator-recipes at pom.xml (Hari via Xuefu)
Repository: hive Updated Branches: refs/heads/master 3056e7aa8 -> f106730ba HIVE-10670: Duplicate declaration of curator-recipes at pom.xml (Hari via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f106730b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f106730b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f106730b Branch: refs/heads/master Commit: f106730ba8d6d65265ad25690e4e8056a7053155 Parents: 3056e7a Author: Xuefu Zhang Authored: Fri May 15 15:13:15 2015 -0700 Committer: Xuefu Zhang Committed: Fri May 15 15:13:15 2015 -0700 -- pom.xml | 5 - 1 file changed, 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f106730b/pom.xml -- diff --git a/pom.xml b/pom.xml index 2e4ca36..08d6dc3 100644 --- a/pom.xml +++ b/pom.xml @@ -502,11 +502,6 @@ ${curator.version} -org.apache.curator -curator-recipes -${curator.version} - - org.codehaus.groovy groovy-all ${groovy.version}
hive git commit: HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)
Repository: hive Updated Branches: refs/heads/llap d567cc445 -> 41178e39d HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41178e39 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41178e39 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41178e39 Branch: refs/heads/llap Commit: 41178e39ddc5d43ff4abe0b147fd902617944aa3 Parents: d567cc4 Author: Siddharth Seth Authored: Fri May 15 14:59:07 2015 -0700 Committer: Siddharth Seth Committed: Fri May 15 14:59:07 2015 -0700 -- .../hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/41178e39/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java index b16a5c4..36f2246 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java @@ -26,7 +26,6 @@ import java.util.Map; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.conf.Configuration; @@ -212,7 +211,7 @@ public class TaskRunnerCallable extends CallableWithNdc { // TODO Fix UGI and FS Handling. Closing UGI here causes some errors right now. //FileSystem.closeAllForUGI(taskUgi); LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" + - runtimeWatch.stop().elapsed(TimeUnit.MILLISECONDS)); + runtimeWatch.stop().elapsedMillis(); if (LOG.isDebugEnabled()) { LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish()); } @@ -377,10 +376,10 @@ public class TaskRunnerCallable extends CallableWithNdc { LOG.info("Killed task {}", requestId); if (killtimerWatch.isRunning()) { killtimerWatch.stop(); -long elapsed = killtimerWatch.elapsed(TimeUnit.MILLISECONDS); +long elapsed = killtimerWatch.elapsedMillis(); LOG.info("Time to die for task {}", elapsed); } - metrics.incrPreemptionTimeLost(runtimeWatch.elapsed(TimeUnit.MILLISECONDS)); + metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis(); metrics.incrExecutorTotalKilled(); break; case COMMUNICATION_FAILURE:
[8/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d421201c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d421201c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d421201c Branch: refs/heads/branch-1.2 Commit: d421201cc9b495e6a7f266d42ff7cfb93483b957 Parents: 878a282 Author: Gunther Hagleitner Authored: Thu May 14 15:42:04 2015 -0700 Committer: Gunther Hagleitner Committed: Fri May 15 14:35:35 2015 -0700 -- .../test/resources/testconfiguration.properties | 10 + .../ql/exec/vector/VectorizedBatchUtil.java |5 +- .../mapjoin/VectorMapJoinCommonOperator.java|8 +- .../VectorMapJoinGenerateResultOperator.java| 47 +- ...pJoinInnerBigOnlyGenerateResultOperator.java | 53 +- .../VectorMapJoinInnerBigOnlyLongOperator.java | 15 +- ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java | 15 +- ...VectorMapJoinInnerBigOnlyStringOperator.java | 12 +- ...ectorMapJoinInnerGenerateResultOperator.java | 39 +- .../mapjoin/VectorMapJoinInnerLongOperator.java | 17 +- .../VectorMapJoinInnerMultiKeyOperator.java | 19 +- .../VectorMapJoinInnerStringOperator.java | 17 +- ...orMapJoinLeftSemiGenerateResultOperator.java | 40 +- .../VectorMapJoinLeftSemiLongOperator.java | 13 +- .../VectorMapJoinLeftSemiMultiKeyOperator.java | 17 +- .../VectorMapJoinLeftSemiStringOperator.java| 17 +- ...ectorMapJoinOuterGenerateResultOperator.java | 805 --- .../mapjoin/VectorMapJoinOuterLongOperator.java | 189 +- .../VectorMapJoinOuterMultiKeyOperator.java | 184 +- .../VectorMapJoinOuterStringOperator.java | 185 +- .../mapjoin/VectorMapJoinRowBytesContainer.java |2 +- .../fast/VectorMapJoinFastBytesHashMap.java |8 +- .../VectorMapJoinFastBytesHashMultiSet.java |4 +- .../fast/VectorMapJoinFastBytesHashTable.java | 10 +- .../mapjoin/fast/VectorMapJoinFastKeyStore.java | 10 +- .../fast/VectorMapJoinFastLongHashMap.java |2 +- .../fast/VectorMapJoinFastLongHashTable.java| 18 +- .../fast/VectorMapJoinFastTableContainer.java |2 +- .../fast/VectorMapJoinFastValueStore.java |8 +- .../VectorMapJoinOptimizedLongCommon.java |4 +- .../hive/ql/optimizer/physical/Vectorizer.java | 24 +- .../test/queries/clientpositive/vector_join30.q | 160 ++ .../clientpositive/vector_join_filters.q| 38 + .../queries/clientpositive/vector_join_nulls.q | 33 + .../clientpositive/vector_left_outer_join2.q|2 + .../queries/clientpositive/vector_outer_join5.q | 173 ++ .../tez/acid_vectorization_partition.q.out | 20 +- .../clientpositive/tez/vector_join30.q.out | 1367 +++ .../tez/vector_join_filters.q.out | 222 ++ .../clientpositive/tez/vector_join_nulls.q.out | 195 ++ .../tez/vector_left_outer_join2.q.out | 20 +- .../tez/vector_left_outer_join3.q.out | 222 ++ .../clientpositive/tez/vector_outer_join5.q.out | 1328 +++ .../tez/vectorized_timestamp_ints_casts.q.out | 234 ++ .../results/clientpositive/vector_join30.q.out | 2194 ++ .../clientpositive/vector_join_filters.q.out| 222 ++ .../clientpositive/vector_join_nulls.q.out | 195 ++ .../vector_left_outer_join2.q.out |8 +- .../clientpositive/vector_outer_join5.q.out | 1300 +++ 49 files changed, 8936 insertions(+), 796 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index eeb46cc..0f2a831 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -220,8 +220,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_groupby_3.q,\ vector_groupby_reduce.q,\ vector_if_expr.q,\ + vector_inner_join.q,\ vector_interval_1.q,\ vector_interval_2.q,\ + vector_join30.q,\ + vector_join_filters.q,\ + vector_join_nulls.q,\ vector_left_outer_join.q,\ vector_left_outer_join2.q,\ vector_leftsemi_mapjoin.q,\ @@ -230,6 +234,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_multi_insert.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ + vector_outer_join0.q,\ + vector_outer_join1.q,\ + vector_outer_join2.q,\ + vector_outer_join3.q,\ + vector_outer_join4.q,\ + vector_outer_join5.q,\ vector_partition_diff_num_cols.q,\ vector_partiti
[2/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/vector_join_filters.q.out -- diff --git a/ql/src/test/results/clientpositive/vector_join_filters.q.out b/ql/src/test/results/clientpositive/vector_join_filters.q.out new file mode 100644 index 000..48fc072 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_join_filters.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE myinput1_txt(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE myinput1_txt(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@myinput1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@myinput1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +3078400 +Warning: Map Join MAPJOIN[17][bigTable=a] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +4937935 +Warning: Map Join MAPJOIN[17][bigTable=b] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUE
[3/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/vector_join30.q.out -- diff --git a/ql/src/test/results/clientpositive/vector_join30.q.out b/ql/src/test/results/clientpositive/vector_join30.q.out new file mode 100644 index 000..57f9aeb --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_join30.q.out @@ -0,0 +1,2194 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orcsrc +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcsrc +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-2, Stage-5, Stage-6 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 +Map Reduce + Map Operator Tree: + TableScan +alias: orcsrc +Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE +Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: +Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 +Conditional Operator + + Stage: Stage-8 +Map Reduce Local Work + Alias -> Map Local Tables: +$INTNAME1 + Fetch Operator +limit: -1 + Alias -> Map Local Operator Tree: +$INTNAME1 + TableScan +HashTable Sink Operator + keys: +0 _col0 (type: string) +1 _col0 (type: string) + + Stage: Stage-5 +Map Reduce + Map Operator Tree: + TableScan +Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator +aggregations: sum(hash(_col2,_col3)) +mode: hash +outputColumnNames: _col0 +File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: +Map Reduce Local Work + + Stage: Stage-3 +Map Reduce + Map Operator Tree: + TableScan +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: +Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputC
[6/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/queries/clientpositive/vector_outer_join5.q -- diff --git a/ql/src/test/queries/clientpositive/vector_outer_join5.q b/ql/src/test/queries/clientpositive/vector_outer_join5.q new file mode 100644 index 000..b7ee4a4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_outer_join5.q @@ -0,0 +1,173 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=1; + +-- SORT_QUERY_RESULTS + +create table sorted_mod_4 stored as orc +as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc +where cint is not null and ctinyint is not null +order by ctinyint; + +ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS; +ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS; + +create table small_table stored +as orc as select ctinyint, cbigint from alltypesorc limit 100; + +ANALYZE TABLE small_table COMPUTE STATISTICS; +ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS; + +explain +select count(*) from (select s.*, st.* +from sorted_mod_4 s +left outer join small_table st +on s.ctinyint = st.ctinyint +) t1; + +select count(*) from (select s.*, st.* +from sorted_mod_4 s +left outer join small_table st +on s.ctinyint = st.ctinyint +) t1; + +explain +select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint +from sorted_mod_4 s +left outer join small_table sm +on s.ctinyint = sm.ctinyint and s.cmodint = 2 +) t1; + +select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint +from sorted_mod_4 s +left outer join small_table sm +on s.ctinyint = sm.ctinyint and s.cmodint = 2 +) t1; + +explain +select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint +from sorted_mod_4 s +left outer join small_table sm +on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint +) t1; + +select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint +from sorted_mod_4 s +left outer join small_table sm +on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint +) t1; + +explain +select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint +from sorted_mod_4 s +left outer join small_table sm +on s.ctinyint = sm.ctinyint and s.ctinyint < 100 +) t1; + +select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint +from sorted_mod_4 s +left outer join small_table sm +on s.ctinyint = sm.ctinyint and s.ctinyint < 100 +) t1; + +explain +select count(*) from (select s.*, sm.*, s2.* +from sorted_mod_4 s +left outer join small_table sm + on pmod(sm.cbigint, 8) = s.cmodint +left outer join sorted_mod_4 s2 + on s2.ctinyint = s.ctinyint +) t1; + +select count(*) from (select s.*, sm.*, s2.* +from sorted_mod_4 s +left outer join small_table sm + on pmod(sm.cbigint, 8) = s.cmodint +left outer join sorted_mod_4 s2 + on s2.ctinyint = s.ctinyint +) t1; + + +create table mod_8_mod_4 stored as orc +as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc +where cint is not null and ctinyint is not null; + +ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS; +ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS; + +create table small_table2 stored +as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100; + +ANALYZE TABLE small_table2 COMPUTE STATISTICS; +ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS; + +explain +select count(*) from (select s.*, st.* +from mod_8_mod_4 s +left outer join small_table2 st +on s.cmodtinyint = st.cmodtinyint +) t1; + +select count(*) from (select s.*, st.* +from mod_8_mod_4 s +left outer join small_table2 st +on s.cmodtinyint = st.cmodtinyint +) t1; + +explain +select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint +from mod_8_mod_4 s +left outer join small_table2 sm +on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 +) t1; + +select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint +from mod_8_mod_4 s +left outer join small_table2 sm +on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 +) t1; + +explain +select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint +from mod_8_mod_4 s +left outer join small_table2 sm +on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint +) t1; + +select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint +from mod_8_mod_4 s +left outer join small_table2 sm +on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint +) t1; + +explain +select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint +from mod_8_mod_4 s +left outer join small_table2 sm +on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 +) t1; + +select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint +from mod_8_mod_4 s +left outer join small_table2 sm +on s.cmodtinyint = sm.cmodtinyint and s.cmod
[1/8] hive git commit: HIVE-10559: IndexOutOfBoundsException with RemoveDynamicPruningBySize (Wei Zhang via Gunther Hagleitner)
Repository: hive Updated Branches: refs/heads/branch-1.2 7f237de44 -> d421201cc HIVE-10559: IndexOutOfBoundsException with RemoveDynamicPruningBySize (Wei Zhang via Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/878a282d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/878a282d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/878a282d Branch: refs/heads/branch-1.2 Commit: 878a282d5c51456e83f90df3044d900fe41af226 Parents: 7f237de Author: Gunther Hagleitner Authored: Thu May 14 15:48:34 2015 -0700 Committer: Gunther Hagleitner Committed: Fri May 15 14:35:12 2015 -0700 -- .../hive/ql/optimizer/ConvertJoinMapJoin.java | 25 ++-- .../optimizer/RemoveDynamicPruningBySize.java | 19 ++- .../hadoop/hive/ql/parse/GenTezUtils.java | 16 +++ .../hive/ql/parse/OptimizeTezProcContext.java | 14 +++ .../hadoop/hive/ql/parse/TezCompiler.java | 19 ++- .../dynamic_partition_pruning_2.q | 17 +++ .../tez/dynamic_partition_pruning_2.q.out | 116 +++ 7 files changed, 196 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/878a282d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index d42b643..bcffdbc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -636,13 +636,26 @@ public class ConvertJoinMapJoin implements NodeProcessor { // we might have generated a dynamic partition operator chain. Since // we're removing the reduce sink we need do remove that too. Set> dynamicPartitionOperators = new HashSet>(); +Map, AppMasterEventOperator> opEventPairs = new HashMap<>(); for (Operator c : p.getChildOperators()) { - if (hasDynamicPartitionBroadcast(c)) { + AppMasterEventOperator event = findDynamicPartitionBroadcast(c); + if (event != null) { dynamicPartitionOperators.add(c); +opEventPairs.put(c, event); } } for (Operator c : dynamicPartitionOperators) { - p.removeChild(c); + if (context.pruningOpsRemovedByPriorOpt.isEmpty() || + !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) { +p.removeChild(c); +// at this point we've found the fork in the op pipeline that has the pruning as a child plan. +LOG.info("Disabling dynamic pruning for: " ++ ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() ++ ". Need to be removed together with reduce sink"); + } +} +for (Operator op : dynamicPartitionOperators) { + context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op)); } } mapJoinOp.getParentOperators().remove(bigTablePosition); @@ -662,15 +675,13 @@ public class ConvertJoinMapJoin implements NodeProcessor { return mapJoinOp; } - private boolean hasDynamicPartitionBroadcast(Operator parent) { -boolean hasDynamicPartitionPruning = false; + private AppMasterEventOperator findDynamicPartitionBroadcast(Operator parent) { for (Operator op : parent.getChildOperators()) { while (op != null) { if (op instanceof AppMasterEventOperator && op.getConf() instanceof DynamicPruningEventDesc) { // found dynamic partition pruning operator - hasDynamicPartitionPruning = true; - break; + return (AppMasterEventOperator)op; } if (op instanceof ReduceSinkOperator || op instanceof FileSinkOperator) { // crossing reduce sink or file sink means the pruning isn't for this parent. @@ -686,6 +697,6 @@ public class ConvertJoinMapJoin implements NodeProcessor { } } -return hasDynamicPartitionPruning; +return null; } } http://git-wip-us.apache.org/repos/asf/hive/blob/878a282d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java index 4803959..5d01311 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java @@ -24,1
[4/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out new file mode 100644 index 000..1e74446 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: explain +select +-- to timestamp + cast (ctinyint as timestamp) + ,cast (csmallint as timestamp) + ,cast (cint as timestamp) + ,cast (cbigint as timestamp) + ,cast (cfloat as timestamp) + ,cast (cdouble as timestamp) + ,cast (cboolean1 as timestamp) + ,cast (cbigint * 0 as timestamp) + ,cast (ctimestamp1 as timestamp) + ,cast (cstring1 as timestamp) + ,cast (substr(cstring1, 1, 1) as timestamp) +from alltypesorc +-- limit output to a reasonably small number of rows +where cbigint % 250 = 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +-- to timestamp + cast (ctinyint as timestamp) + ,cast (csmallint as timestamp) + ,cast (cint as timestamp) + ,cast (cbigint as timestamp) + ,cast (cfloat as timestamp) + ,cast (cdouble as timestamp) + ,cast (cboolean1 as timestamp) + ,cast (cbigint * 0 as timestamp) + ,cast (ctimestamp1 as timestamp) + ,cast (cstring1 as timestamp) + ,cast (substr(cstring1, 1, 1) as timestamp) +from alltypesorc +-- limit output to a reasonably small number of rows +where cbigint % 250 = 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 +Fetch Operator + limit: -1 + Processor Tree: +TableScan + alias: alltypesorc + Filter Operator +predicate: ((cbigint % 250) = 0) (type: boolean) +Select Operator + expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + ListSink + +PREHOOK: query: select +-- to timestamp + cast (ctinyint as timestamp) + ,cast (csmallint as timestamp) + ,cast (cint as timestamp) + ,cast (cbigint as timestamp) + ,cast (cfloat as timestamp) + ,cast (cdouble as timestamp) + ,cast (cboolean1 as timestamp) + ,cast (cbigint * 0 as timestamp) + ,cast (ctimestamp1 as timestamp) + ,cast (cstring1 as timestamp) + ,cast (substr(cstring1, 1, 1) as timestamp) +from alltypesorc +-- limit output to a reasonably small number of rows +where cbigint % 250 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc + A masked pattern was here +POSTHOOK: query: select +-- to timestamp + cast (ctinyint as timestamp) + ,cast (csmallint as timestamp) + ,cast (cint as timestamp) + ,cast (cbigint as timestamp) + ,cast (cfloat as timestamp) + ,cast (cdouble as timestamp) + ,cast (cboolean1 as timestamp) + ,cast (cbigint * 0 as timestamp) + ,cast (ctimestamp1 as timestamp) + ,cast (cstring1 as timestamp) + ,cast (substr(cstring1, 1, 1) as timestamp) +from alltypesorc +-- limit output to a reasonably small number of rows +where cbigint % 250 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc + A masked pattern was here +1969-12-31 15:59:59.9641969-12-31 15:59:59.8 NULL1969-12-08 10:43:03.25 1969-12-31 15:59:24 1969-12-31 15:56:40 NULL1969-12-31 16:00:00 1969-12-31 15:59:45.748 NULLNULL +1969-12-31 15:59:59.9641969-12-31 15:59:59.8 NULL1970-01-19 04:24:39 1969-12-31 15:59:24 1969-12-31 15:56:40 NULL1969-12-31 16:00:00 1969-12-31 15:59:53.817 NULLNULL +1969-12-31 15:59:59.97 1969-12-31 15:59:59.8 NULL1970-01-17 05:10:52.25 1969-12-31 15:59:30 1969-12-31 15:56:40 NULL1969-12-31 16:00:00 1969-12-31 16:00:12.935 NULLNULL +1969-12-31 15:59:59.949NULL1970-01-09 14:53:20.971 1970-01-12 20:45:23.25 1969-12-31 15:59:09 NULL1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:08.451 NULLNULL +1969-12-31 15:59:59.949NULL1970-01-09 07:39:13.882 1969-12-09 07:45:32.75 1969-12-31 15:59:09 NULL1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:08.451 NULLNULL +1969-12-31 16:00:00.02 1969-12-31 16:00:15.601 NULL1969-12-27 11:19:26.75 1969-12-31 16:00:20 1969-12-31
[5/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out new file mode 100644 index 000..2243072 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE myinput1_txt(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE myinput1_txt(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@myinput1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@myinput1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +13630578 +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +13630578 +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +13630578 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +4509856 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +3112070 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 + A masked pattern was here +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 + A masked pattern was here +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTE
[7/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index 37ccf22..f971727 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -24,7 +24,9 @@ import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -123,13 +125,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe batchCounter++; - // Do the per-batch setup for an outer join. - - outerPerBatchSetup(batch); - - // For outer join, DO NOT apply filters yet. It is incorrect for outer join to - // apply the filter before hash table matching. - final int inputLogicalSize = batch.size; if (inputLogicalSize == 0) { @@ -139,6 +134,44 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe return; } + // Do the per-batch setup for an outer join. + + outerPerBatchSetup(batch); + + // For outer join, remember our input rows before ON expression filtering or before + // hash table matching so we can generate results for all rows (matching and non matching) + // later. + boolean inputSelectedInUse = batch.selectedInUse; + if (inputSelectedInUse) { +// if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { +// throw new HiveException("batch.selected is not in sort order and unique"); +// } +System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); + } + + // Filtering for outer join just removes rows available for hash table matching. + boolean someRowsFilteredOut = false; + if (bigTableFilterExpressions.length > 0) { +// Since the input +for (VectorExpression ve : bigTableFilterExpressions) { + ve.evaluate(batch); +} +someRowsFilteredOut = (batch.size != inputLogicalSize); +if (LOG.isDebugEnabled()) { + if (batch.selectedInUse) { +if (inputSelectedInUse) { + LOG.debug(CLASS_NAME + + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); +} else { + LOG.debug(CLASS_NAME + +" inputLogicalSize " + inputLogicalSize + +" filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); +} + } +} + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { @@ -146,9 +179,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column Long specific declarations. */ @@ -178,12 +208,16 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe */ JoinUtil.JoinResult joinResult; -if (!joinColVector.noNulls && joinColVector.isNull[0]) { - // Null key is no match for whole batch. +if (batch.size == 0) { + // Whole repeated key batch was filtered out. + joinResult = JoinUtil.JoinResult.NOMATCH; +} else if (!joinColVector.noNulls && joinColVector.isNull[0]) { + // Any (repeated) null key column is no match for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { // Handle *repeated* join key, if found. long key = vector[0]; + // LOG.debug(CLASS_NAME + " repeated key " + key); if (useMinMax && (key < min || key > max)) { // Out of range for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; @@ -199,7 +233,8 @@ public class VectorMapJoinOuterLongOperator extends Vec
hive git commit: HIVE-10714: Bloom filter column names specification should be case insensitive (Prasanth Jayachandran reviewed by Gopal V)
Repository: hive Updated Branches: refs/heads/master f82c0c20c -> 3056e7aa8 HIVE-10714: Bloom filter column names specification should be case insensitive (Prasanth Jayachandran reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3056e7aa Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3056e7aa Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3056e7aa Branch: refs/heads/master Commit: 3056e7aa856a6b591cf5c19743579f0991a477af Parents: f82c0c2 Author: Prasanth Jayachandran Authored: Fri May 15 11:22:51 2015 -0700 Committer: Prasanth Jayachandran Committed: Fri May 15 11:22:51 2015 -0700 -- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java | 2 +- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/3056e7aa/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java index ba59b35..0776018 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java @@ -58,7 +58,7 @@ public class OrcUtils { int numFlattenedCols = getFlattenedColumnsCount(inspector); boolean[] results = new boolean[numFlattenedCols]; if (selectedColumns != null && !selectedColumns.isEmpty()) { - includeColumnsImpl(results, selectedColumns, allColumns, inspector); + includeColumnsImpl(results, selectedColumns.toLowerCase(), allColumns, inspector); } return results; } http://git-wip-us.apache.org/repos/asf/hive/blob/3056e7aa/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java index b56c1ca..5b200f1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java @@ -316,7 +316,7 @@ public class TestFileDump { .compress(CompressionKind.ZLIB) .bufferSize(1) .rowIndexStride(1000) -.bloomFilterColumns("s"); +.bloomFilterColumns("S"); Writer writer = OrcFile.createWriter(testFilePath, options); Random r1 = new Random(1); String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
hive git commit: HIVE-10641 create CRC32 UDF (Alexander Pivovarov, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master 84a2235c5 -> f82c0c20c HIVE-10641 create CRC32 UDF (Alexander Pivovarov, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f82c0c20 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f82c0c20 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f82c0c20 Branch: refs/heads/master Commit: f82c0c20c5038f54b4a24561c943ca646a272d18 Parents: 84a2235 Author: Alexander Pivovarov Authored: Wed May 6 23:14:29 2015 -0700 Committer: Alexander Pivovarov Committed: Fri May 15 10:27:26 2015 -0700 -- .../hadoop/hive/ql/exec/FunctionRegistry.java | 2 + .../org/apache/hadoop/hive/ql/udf/UDFCrc32.java | 75 .../apache/hadoop/hive/ql/udf/TestUDFCrc32.java | 74 +++ ql/src/test/queries/clientpositive/udf_crc32.q | 13 .../results/clientpositive/show_functions.q.out | 2 + .../test/results/clientpositive/udf_crc32.q.out | 60 6 files changed, 226 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 606185c..7ce0a1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.udf.UDFBase64; import org.apache.hadoop.hive.ql.udf.UDFBin; import org.apache.hadoop.hive.ql.udf.UDFConv; import org.apache.hadoop.hive.ql.udf.UDFCos; +import org.apache.hadoop.hive.ql.udf.UDFCrc32; import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; import org.apache.hadoop.hive.ql.udf.UDFDegrees; import org.apache.hadoop.hive.ql.udf.UDFE; @@ -219,6 +220,7 @@ public final class FunctionRegistry { system.registerUDF("tan", UDFTan.class, false); system.registerUDF("e", UDFE.class, false); system.registerGenericUDF("factorial", GenericUDFFactorial.class); +system.registerUDF("crc32", UDFCrc32.class, false); system.registerUDF("conv", UDFConv.class, false); system.registerUDF("bin", UDFBin.class, false); http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java new file mode 100644 index 000..c1f0e38 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import java.util.zip.CRC32; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +/** + * UDFCrc32. + * + */ +@Description(name = "crc32", +value = "_FUNC_(str or bin) - Computes a cyclic redundancy check value " ++ "for string or binary argument and returns bigint value.", +extended = "Example:\n" ++ " > SELECT _FUNC_('ABC');\n" ++ " 2743272264\n" ++ " > SELECT _FUNC_(binary('ABC'));\n" ++ " 2743272264") +public class UDFCrc32 extends UDF { + + private final LongWritable result = new LongWritable(); + private final CRC32 crc32 = new CRC32(); + + /** + * CRC32 for string + */ + public LongWritable evaluate(Text n) { +if (n == null) { + return null; +} + +crc32.reset(); +crc32.update(n.getBytes(), 0, n.getLength()); + +result.set(crc32.getValue()); +return result; + } + + /** + * CRC32 for binary + */ + public LongWritable evaluate(BytesWritable b) { +if (b == null) { + return null; +
hive git commit: HIVE-10639 create SHA1 UDF (Alexander Pivovarov, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master bf7810ab6 -> 84a2235c5 HIVE-10639 create SHA1 UDF (Alexander Pivovarov, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/84a2235c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/84a2235c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/84a2235c Branch: refs/heads/master Commit: 84a2235c5590944120bb9319565f5e113b6275a1 Parents: bf7810a Author: Alexander Pivovarov Authored: Wed May 6 20:13:22 2015 -0700 Committer: Alexander Pivovarov Committed: Fri May 15 10:23:27 2015 -0700 -- .../hadoop/hive/ql/exec/FunctionRegistry.java | 3 + .../org/apache/hadoop/hive/ql/udf/UDFSha1.java | 88 .../apache/hadoop/hive/ql/udf/TestUDFSha1.java | 57 + ql/src/test/queries/clientpositive/udf_sha1.q | 13 +++ .../results/clientpositive/show_functions.q.out | 2 + .../test/results/clientpositive/udf_sha1.q.out | 61 ++ 6 files changed, 224 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 02a604f..606185c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -86,6 +86,7 @@ import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace; import org.apache.hadoop.hive.ql.udf.UDFRepeat; import org.apache.hadoop.hive.ql.udf.UDFReverse; import org.apache.hadoop.hive.ql.udf.UDFSecond; +import org.apache.hadoop.hive.ql.udf.UDFSha1; import org.apache.hadoop.hive.ql.udf.UDFSign; import org.apache.hadoop.hive.ql.udf.UDFSin; import org.apache.hadoop.hive.ql.udf.UDFSpace; @@ -226,6 +227,8 @@ public final class FunctionRegistry { system.registerUDF("base64", UDFBase64.class, false); system.registerUDF("unbase64", UDFUnbase64.class, false); system.registerUDF("md5", UDFMd5.class, false); +system.registerUDF("sha1", UDFSha1.class, false); +system.registerUDF("sha", UDFSha1.class, false); system.registerGenericUDF("encode", GenericUDFEncode.class); system.registerGenericUDF("decode", GenericUDFDecode.class); http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java new file mode 100644 index 000..04e6f81 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.apache.commons.codec.binary.Hex; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; + +/** + * UDFSha. + * + */ +@Description(name = "sha1,sha", +value = "_FUNC_(str or bin) - Calculates the SHA-1 digest for string or binary " ++ "and returns the value as a hex string.", +extended = "Example:\n" ++ " > SELECT _FUNC_('ABC');\n" ++ " '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'\n" ++ " > SELECT _FUNC_(binary('ABC'));\n" ++ " '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'") +public class UDFSha1 extends UDF { + + private final Text result = new Text(); + private final MessageDigest digest; + + public UDFSha1() { +try { + digest = MessageDigest.getInstance("SHA"); +} catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); +} + } + + /** + * Convert String to SHA-1 + */ + publi
hive git commit: HIVE-10657 Remove copyBytes operation from MD5 UDF (Alexander Pivovarov, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master ad62e2ede -> bf7810ab6 HIVE-10657 Remove copyBytes operation from MD5 UDF (Alexander Pivovarov, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bf7810ab Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bf7810ab Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bf7810ab Branch: refs/heads/master Commit: bf7810ab64caabc47d4370d04f80c0c1f182426a Parents: ad62e2e Author: Alexander Pivovarov Authored: Fri May 8 11:36:47 2015 -0700 Committer: Alexander Pivovarov Committed: Fri May 15 10:20:32 2015 -0700 -- .../org/apache/hadoop/hive/ql/udf/UDFMd5.java | 33 +--- 1 file changed, 21 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/bf7810ab/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java index 62c16c2..14928ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.udf; -import org.apache.commons.codec.digest.DigestUtils; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.BytesWritable; @@ -39,6 +42,15 @@ import org.apache.hadoop.io.Text; public class UDFMd5 extends UDF { private final Text result = new Text(); + private final MessageDigest digest; + + public UDFMd5() { +try { + digest = MessageDigest.getInstance("MD5"); +} catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); +} + } /** * Convert String to md5 @@ -48,8 +60,10 @@ public class UDFMd5 extends UDF { return null; } -String str = n.toString(); -String md5Hex = DigestUtils.md5Hex(str); +digest.reset(); +digest.update(n.getBytes(), 0, n.getLength()); +byte[] md5Bytes = digest.digest(); +String md5Hex = Hex.encodeHexString(md5Bytes); result.set(md5Hex); return result; @@ -63,17 +77,12 @@ public class UDFMd5 extends UDF { return null; } -byte[] bytes = copyBytes(b); -String md5Hex = DigestUtils.md5Hex(bytes); +digest.reset(); +digest.update(b.getBytes(), 0, b.getLength()); +byte[] md5Bytes = digest.digest(); +String md5Hex = Hex.encodeHexString(md5Bytes); result.set(md5Hex); return result; } - - protected byte[] copyBytes(BytesWritable b) { -int size = b.getLength(); -byte[] result = new byte[size]; -System.arraycopy(b.getBytes(), 0, result, 0, size); -return result; - } }