[2/2] hive git commit: HIVE-10636 : CASE comparison operator rotation optimization (Ashutosh Chauhan via Gopal V)

2015-05-15 Thread hashutosh
HIVE-10636 : CASE comparison operator rotation optimization (Ashutosh Chauhan 
via Gopal V)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b1ad20f9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b1ad20f9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b1ad20f9

Branch: refs/heads/branch-1.2
Commit: b1ad20f9404f01dfc6ea95ad0af614901d4cb4b3
Parents: 5984bec
Author: Ashutosh Chauhan 
Authored: Tue May 12 14:58:06 2015 -0700
Committer: Ashutosh Chauhan 
Committed: Fri May 15 18:31:38 2015 -0700

--
 .../hive/ql/exec/ExprNodeEvaluatorFactory.java  |   7 -
 .../ql/exec/vector/VectorizationContext.java|  13 +-
 .../optimizer/ConstantPropagateProcFactory.java |  86 +--
 .../hive/ql/optimizer/GroupByOptimizer.java |   7 +-
 .../PrunerExpressionOperatorFactory.java|   3 -
 .../hive/ql/optimizer/SimpleFetchOptimizer.java |   2 -
 .../calcite/translator/RexNodeConverter.java|   5 +-
 .../ql/optimizer/lineage/ExprProcFactory.java   |   3 +-
 .../ql/optimizer/pcr/PcrExprProcFactory.java|   3 +-
 .../hive/ql/parse/TableAccessAnalyzer.java  |   6 +-
 .../hive/ql/parse/TypeCheckProcFactory.java |   4 +-
 .../hive/ql/plan/ExprNodeConstantDesc.java  |  14 +-
 .../hadoop/hive/ql/plan/ExprNodeNullDesc.java   |  69 --
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  12 +-
 .../hive/ql/udf/generic/GenericUDFCoalesce.java |   2 +-
 .../hive/ql/udf/generic/GenericUDFGreatest.java |   1 +
 .../hive/ql/udf/generic/GenericUDFInstr.java|   2 +-
 .../hive/ql/udf/generic/GenericUDFLocate.java   |   2 +-
 .../hive/ql/udf/generic/GenericUDFPrintf.java   |   3 +-
 .../ql/udf/generic/GenericUDFTranslate.java |   8 +-
 .../hive/ql/udf/generic/GenericUDFUtils.java|   6 +-
 .../clientpositive/fold_eq_with_case_when.q |  21 ++
 .../clientpositive/annotate_stats_filter.q.out  |   4 +-
 .../clientpositive/fold_eq_with_case_when.q.out | 231 +++
 .../test/results/clientpositive/fold_when.q.out |   2 +-
 ql/src/test/results/clientpositive/input6.q.out |   2 +-
 .../results/clientpositive/join_nullsafe.q.out  |  10 +-
 .../clientpositive/spark/join_nullsafe.q.out|  10 +-
 .../subquery_notin_having.q.java1.7.out |   2 +-
 .../clientpositive/tez/join_nullsafe.q.out  |  14 +-
 .../clientpositive/tez/vector_coalesce.q.out|  26 +--
 .../clientpositive/vector_coalesce.q.out|   6 +-
 .../hive/serde2/io/HiveVarcharWritable.java |   7 +-
 .../ObjectInspectorConverters.java  |   5 +-
 .../objectinspector/ObjectInspectorUtils.java   |   4 +-
 .../AbstractPrimitiveObjectInspector.java   |   1 +
 .../primitive/WritableVoidObjectInspector.java  |   5 +
 37 files changed, 396 insertions(+), 212 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b1ad20f9/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
index a149571..f08321c 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
@@ -27,7 +27,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.io.NullWritable;
 
@@ -57,12 +56,6 @@ public final class ExprNodeEvaluatorFactory {
 if (desc instanceof ExprNodeFieldDesc) {
   return new ExprNodeFieldEvaluator((ExprNodeFieldDesc) desc);
 }
-// Null node, a constant node with value NULL and no type information
-if (desc instanceof ExprNodeNullDesc) {
-  return new ExprNodeConstantEvaluator(new 
ExprNodeConstantDesc(TypeInfoFactory
-  .getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class), 
null));
-}
-
 throw new RuntimeException(
 "Cannot find ExprNodeEvaluator for the exprNodeDesc = " + desc);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/b1ad20f9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index df39218..48f34a9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive

[1/2] hive git commit: HIVE-10325 : Remove ExprNodeNullEvaluator (Ashutosh Chauhan via Gopal V)

2015-05-15 Thread hashutosh
Repository: hive
Updated Branches:
  refs/heads/branch-1.2 d421201cc -> b1ad20f94


HIVE-10325 : Remove ExprNodeNullEvaluator (Ashutosh Chauhan via Gopal V)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5984bec5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5984bec5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5984bec5

Branch: refs/heads/branch-1.2
Commit: 5984bec5c844f23d98b5e0da42d4784a83eb93ac
Parents: d421201
Author: Ashutosh Chauhan 
Authored: Tue Apr 14 13:46:50 2015 -0700
Committer: Ashutosh Chauhan 
Committed: Fri May 15 18:31:10 2015 -0700

--
 .../hive/ql/exec/ExprNodeEvaluatorFactory.java  | 11 +++--
 .../ql/exec/ExprNodeGenericFuncEvaluator.java   |  3 +-
 .../hive/ql/exec/ExprNodeNullEvaluator.java | 47 
 .../hive/ql/plan/ExprNodeConstantDesc.java  |  1 -
 4 files changed, 9 insertions(+), 53 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/5984bec5/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
index ff0ddc8..a149571 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.NullWritable;
 
 /**
  * ExprNodeEvaluatorFactory.
@@ -57,7 +59,8 @@ public final class ExprNodeEvaluatorFactory {
 }
 // Null node, a constant node with value NULL and no type information
 if (desc instanceof ExprNodeNullDesc) {
-  return new ExprNodeNullEvaluator((ExprNodeNullDesc) desc);
+  return new ExprNodeConstantEvaluator(new 
ExprNodeConstantDesc(TypeInfoFactory
+  .getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class), 
null));
 }
 
 throw new RuntimeException(
@@ -114,14 +117,14 @@ public final class ExprNodeEvaluatorFactory {
 
   private static class EvaluatorContext {
 
-private final Map cached = 
+private final Map cached =
 new HashMap();
 
 private boolean hasReference;
 
 public ExprNodeEvaluator getEvaluated(ExprNodeEvaluator eval) {
-  ExprNodeDesc.ExprNodeDescEqualityWrapper key = 
-  new ExprNodeDesc.ExprNodeDescEqualityWrapper(eval.expr); 
+  ExprNodeDesc.ExprNodeDescEqualityWrapper key =
+  new ExprNodeDesc.ExprNodeDescEqualityWrapper(eval.expr);
   ExprNodeEvaluator prev = cached.get(key);
   if (prev == null) {
 cached.put(key, eval);

http://git-wip-us.apache.org/repos/asf/hive/blob/5984bec5/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java
index b695bef..b09b706 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java
@@ -78,9 +78,10 @@ public class ExprNodeGenericFuncEvaluator extends 
ExprNodeEvaluatorhttp://git-wip-us.apache.org/repos/asf/hive/blob/5984bec5/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java
deleted file mode 100644
index 3aaf17c..000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeNullEvaluator.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on a

[1/2] hive git commit: Revert "HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)"

2015-05-15 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/llap 41178e39d -> 0db5368c0


Revert "HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)"

This reverts commit 41178e39ddc5d43ff4abe0b147fd902617944aa3.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6701aa04
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6701aa04
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6701aa04

Branch: refs/heads/llap
Commit: 6701aa04ef1d194369f702f88eb4b47123f42958
Parents: 41178e3
Author: Prasanth Jayachandran 
Authored: Fri May 15 15:27:20 2015 -0700
Committer: Prasanth Jayachandran 
Committed: Fri May 15 15:27:20 2015 -0700

--
 .../hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java  | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/6701aa04/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
index 36f2246..b16a5c4 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
@@ -26,6 +26,7 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.hadoop.conf.Configuration;
@@ -211,7 +212,7 @@ public class TaskRunnerCallable extends 
CallableWithNdc {
   // TODO Fix UGI and FS Handling. Closing UGI here causes some errors 
right now.
   //FileSystem.closeAllForUGI(taskUgi);
   LOG.info("ExecutionTime for Container: " + 
request.getContainerIdString() + "=" +
-  runtimeWatch.stop().elapsedMillis();
+  runtimeWatch.stop().elapsed(TimeUnit.MILLISECONDS));
   if (LOG.isDebugEnabled()) {
 LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() 
+ ": " + canFinish());
   }
@@ -376,10 +377,10 @@ public class TaskRunnerCallable extends 
CallableWithNdc {
   LOG.info("Killed task {}", requestId);
   if (killtimerWatch.isRunning()) {
 killtimerWatch.stop();
-long elapsed = killtimerWatch.elapsedMillis();
+long elapsed = killtimerWatch.elapsed(TimeUnit.MILLISECONDS);
 LOG.info("Time to die for task {}", elapsed);
   }
-  metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis();
+  
metrics.incrPreemptionTimeLost(runtimeWatch.elapsed(TimeUnit.MILLISECONDS));
   metrics.incrExecutorTotalKilled();
   break;
 case COMMUNICATION_FAILURE:



svn commit: r9016 - in /release/hive: ./ hive-1.2.0/

2015-05-15 Thread vikram
Author: vikram
Date: Fri May 15 22:36:58 2015
New Revision: 9016

Log:
Add hive-1.2.0 to svn dist

Added:
release/hive/hive-1.2.0/
release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz   (with props)
release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.asc   (with props)
release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5
release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz   (with props)
release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.asc   (with props)
release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5
Modified:
release/hive/stable

Added: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz
==
Binary file - no diff available.

Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz
--
svn:mime-type = application/x-gzip

Added: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.asc
==
Binary file - no diff available.

Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.asc
--
svn:mime-type = application/pgp-signature

Added: release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5
==
--- release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5 (added)
+++ release/hive/hive-1.2.0/apache-hive-1.2.0-bin.tar.gz.md5 Fri May 15 
22:36:58 2015
@@ -0,0 +1 @@
+17871eea4d087695ac5d0d03386e4ec2  apache-hive-1.2.0-bin.tar.gz

Added: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz
==
Binary file - no diff available.

Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz
--
svn:mime-type = application/x-gzip

Added: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.asc
==
Binary file - no diff available.

Propchange: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.asc
--
svn:mime-type = application/pgp-signature

Added: release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5
==
--- release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5 (added)
+++ release/hive/hive-1.2.0/apache-hive-1.2.0-src.tar.gz.md5 Fri May 15 
22:36:58 2015
@@ -0,0 +1 @@
+55f3bf70ec40041ecad8dcdc0435d6b6  apache-hive-1.2.0-src.tar.gz

Modified: release/hive/stable
==
--- release/hive/stable (original)
+++ release/hive/stable Fri May 15 22:36:58 2015
@@ -1 +1 @@
-link hive-1.1.0
\ No newline at end of file
+link hive-1.2.0
\ No newline at end of file




[2/2] hive git commit: HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)

2015-05-15 Thread prasanthj
HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0db5368c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0db5368c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0db5368c

Branch: refs/heads/llap
Commit: 0db5368c0cc0f4329916a72a11c9d99a02971bac
Parents: 6701aa0
Author: Prasanth Jayachandran 
Authored: Fri May 15 15:36:08 2015 -0700
Committer: Prasanth Jayachandran 
Committed: Fri May 15 15:36:08 2015 -0700

--
 .../hive/llap/daemon/impl/TaskRunnerCallable.java  | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0db5368c/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
index b16a5c4..72fcf3b 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
@@ -26,7 +26,6 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.hadoop.conf.Configuration;
@@ -59,6 +58,9 @@ import 
org.apache.tez.runtime.internals.api.TaskReporterInterface;
 import org.apache.tez.runtime.library.input.UnorderedKVInput;
 import org.apache.tez.runtime.task.EndReason;
 import org.apache.tez.runtime.task.TaskRunner2Result;
+import org.apache.tez.runtime.task.TezTaskRunner2;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Stopwatch;
 import com.google.common.collect.HashMultimap;
@@ -67,9 +69,6 @@ import com.google.common.util.concurrent.FutureCallback;
 import com.google.common.util.concurrent.ListeningExecutorService;
 import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import org.apache.tez.runtime.task.TezTaskRunner2;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  *
@@ -212,7 +211,7 @@ public class TaskRunnerCallable extends 
CallableWithNdc {
   // TODO Fix UGI and FS Handling. Closing UGI here causes some errors 
right now.
   //FileSystem.closeAllForUGI(taskUgi);
   LOG.info("ExecutionTime for Container: " + 
request.getContainerIdString() + "=" +
-  runtimeWatch.stop().elapsed(TimeUnit.MILLISECONDS));
+  runtimeWatch.stop().elapsedMillis());
   if (LOG.isDebugEnabled()) {
 LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() 
+ ": " + canFinish());
   }
@@ -377,10 +376,10 @@ public class TaskRunnerCallable extends 
CallableWithNdc {
   LOG.info("Killed task {}", requestId);
   if (killtimerWatch.isRunning()) {
 killtimerWatch.stop();
-long elapsed = killtimerWatch.elapsed(TimeUnit.MILLISECONDS);
+long elapsed = killtimerWatch.elapsedMillis();
 LOG.info("Time to die for task {}", elapsed);
   }
-  
metrics.incrPreemptionTimeLost(runtimeWatch.elapsed(TimeUnit.MILLISECONDS));
+  metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis());
   metrics.incrExecutorTotalKilled();
   break;
 case COMMUNICATION_FAILURE:



hive git commit: HIVE-10670: Duplicate declaration of curator-recipes at pom.xml (Hari via Xuefu)

2015-05-15 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 3056e7aa8 -> f106730ba


HIVE-10670: Duplicate declaration of curator-recipes at pom.xml (Hari via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f106730b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f106730b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f106730b

Branch: refs/heads/master
Commit: f106730ba8d6d65265ad25690e4e8056a7053155
Parents: 3056e7a
Author: Xuefu Zhang 
Authored: Fri May 15 15:13:15 2015 -0700
Committer: Xuefu Zhang 
Committed: Fri May 15 15:13:15 2015 -0700

--
 pom.xml | 5 -
 1 file changed, 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f106730b/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 2e4ca36..08d6dc3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -502,11 +502,6 @@
 ${curator.version}
   
   
-org.apache.curator
-curator-recipes
-${curator.version}
-  
-  
 org.codehaus.groovy
 groovy-all
 ${groovy.version}



hive git commit: HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)

2015-05-15 Thread sseth
Repository: hive
Updated Branches:
  refs/heads/llap d567cc445 -> 41178e39d


HIVE-10730. LLAP: fix guava stopwatch conflict. (Siddharth Seth)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41178e39
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41178e39
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41178e39

Branch: refs/heads/llap
Commit: 41178e39ddc5d43ff4abe0b147fd902617944aa3
Parents: d567cc4
Author: Siddharth Seth 
Authored: Fri May 15 14:59:07 2015 -0700
Committer: Siddharth Seth 
Committed: Fri May 15 14:59:07 2015 -0700

--
 .../hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java  | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/41178e39/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
index b16a5c4..36f2246 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
@@ -26,7 +26,6 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.hadoop.conf.Configuration;
@@ -212,7 +211,7 @@ public class TaskRunnerCallable extends 
CallableWithNdc {
   // TODO Fix UGI and FS Handling. Closing UGI here causes some errors 
right now.
   //FileSystem.closeAllForUGI(taskUgi);
   LOG.info("ExecutionTime for Container: " + 
request.getContainerIdString() + "=" +
-  runtimeWatch.stop().elapsed(TimeUnit.MILLISECONDS));
+  runtimeWatch.stop().elapsedMillis();
   if (LOG.isDebugEnabled()) {
 LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() 
+ ": " + canFinish());
   }
@@ -377,10 +376,10 @@ public class TaskRunnerCallable extends 
CallableWithNdc {
   LOG.info("Killed task {}", requestId);
   if (killtimerWatch.isRunning()) {
 killtimerWatch.stop();
-long elapsed = killtimerWatch.elapsed(TimeUnit.MILLISECONDS);
+long elapsed = killtimerWatch.elapsedMillis();
 LOG.info("Time to die for task {}", elapsed);
   }
-  
metrics.incrPreemptionTimeLost(runtimeWatch.elapsed(TimeUnit.MILLISECONDS));
+  metrics.incrPreemptionTimeLost(runtimeWatch.elapsedMillis();
   metrics.incrExecutorTotalKilled();
   break;
 case COMMUNICATION_FAILURE:



[8/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

2015-05-15 Thread gunther
HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on 
LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d421201c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d421201c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d421201c

Branch: refs/heads/branch-1.2
Commit: d421201cc9b495e6a7f266d42ff7cfb93483b957
Parents: 878a282
Author: Gunther Hagleitner 
Authored: Thu May 14 15:42:04 2015 -0700
Committer: Gunther Hagleitner 
Committed: Fri May 15 14:35:35 2015 -0700

--
 .../test/resources/testconfiguration.properties |   10 +
 .../ql/exec/vector/VectorizedBatchUtil.java |5 +-
 .../mapjoin/VectorMapJoinCommonOperator.java|8 +-
 .../VectorMapJoinGenerateResultOperator.java|   47 +-
 ...pJoinInnerBigOnlyGenerateResultOperator.java |   53 +-
 .../VectorMapJoinInnerBigOnlyLongOperator.java  |   15 +-
 ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java |   15 +-
 ...VectorMapJoinInnerBigOnlyStringOperator.java |   12 +-
 ...ectorMapJoinInnerGenerateResultOperator.java |   39 +-
 .../mapjoin/VectorMapJoinInnerLongOperator.java |   17 +-
 .../VectorMapJoinInnerMultiKeyOperator.java |   19 +-
 .../VectorMapJoinInnerStringOperator.java   |   17 +-
 ...orMapJoinLeftSemiGenerateResultOperator.java |   40 +-
 .../VectorMapJoinLeftSemiLongOperator.java  |   13 +-
 .../VectorMapJoinLeftSemiMultiKeyOperator.java  |   17 +-
 .../VectorMapJoinLeftSemiStringOperator.java|   17 +-
 ...ectorMapJoinOuterGenerateResultOperator.java |  805 ---
 .../mapjoin/VectorMapJoinOuterLongOperator.java |  189 +-
 .../VectorMapJoinOuterMultiKeyOperator.java |  184 +-
 .../VectorMapJoinOuterStringOperator.java   |  185 +-
 .../mapjoin/VectorMapJoinRowBytesContainer.java |2 +-
 .../fast/VectorMapJoinFastBytesHashMap.java |8 +-
 .../VectorMapJoinFastBytesHashMultiSet.java |4 +-
 .../fast/VectorMapJoinFastBytesHashTable.java   |   10 +-
 .../mapjoin/fast/VectorMapJoinFastKeyStore.java |   10 +-
 .../fast/VectorMapJoinFastLongHashMap.java  |2 +-
 .../fast/VectorMapJoinFastLongHashTable.java|   18 +-
 .../fast/VectorMapJoinFastTableContainer.java   |2 +-
 .../fast/VectorMapJoinFastValueStore.java   |8 +-
 .../VectorMapJoinOptimizedLongCommon.java   |4 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |   24 +-
 .../test/queries/clientpositive/vector_join30.q |  160 ++
 .../clientpositive/vector_join_filters.q|   38 +
 .../queries/clientpositive/vector_join_nulls.q  |   33 +
 .../clientpositive/vector_left_outer_join2.q|2 +
 .../queries/clientpositive/vector_outer_join5.q |  173 ++
 .../tez/acid_vectorization_partition.q.out  |   20 +-
 .../clientpositive/tez/vector_join30.q.out  | 1367 +++
 .../tez/vector_join_filters.q.out   |  222 ++
 .../clientpositive/tez/vector_join_nulls.q.out  |  195 ++
 .../tez/vector_left_outer_join2.q.out   |   20 +-
 .../tez/vector_left_outer_join3.q.out   |  222 ++
 .../clientpositive/tez/vector_outer_join5.q.out | 1328 +++
 .../tez/vectorized_timestamp_ints_casts.q.out   |  234 ++
 .../results/clientpositive/vector_join30.q.out  | 2194 ++
 .../clientpositive/vector_join_filters.q.out|  222 ++
 .../clientpositive/vector_join_nulls.q.out  |  195 ++
 .../vector_left_outer_join2.q.out   |8 +-
 .../clientpositive/vector_outer_join5.q.out | 1300 +++
 49 files changed, 8936 insertions(+), 796 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index eeb46cc..0f2a831 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -220,8 +220,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   vector_groupby_3.q,\
   vector_groupby_reduce.q,\
   vector_if_expr.q,\
+  vector_inner_join.q,\
   vector_interval_1.q,\
   vector_interval_2.q,\
+  vector_join30.q,\
+  vector_join_filters.q,\
+  vector_join_nulls.q,\
   vector_left_outer_join.q,\
   vector_left_outer_join2.q,\
   vector_leftsemi_mapjoin.q,\
@@ -230,6 +234,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   vector_multi_insert.q,\
   vector_non_string_partition.q,\
   vector_orderby_5.q,\
+  vector_outer_join0.q,\
+  vector_outer_join1.q,\
+  vector_outer_join2.q,\
+  vector_outer_join3.q,\
+  vector_outer_join4.q,\
+  vector_outer_join5.q,\
   vector_partition_diff_num_cols.q,\
   vector_partiti

[2/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

2015-05-15 Thread gunther
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/vector_join_filters.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_join_filters.q.out 
b/ql/src/test/results/clientpositive/vector_join_filters.q.out
new file mode 100644
index 000..48fc072
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join_filters.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE 
myinput1_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE 
myinput1_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM 
myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM 
myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross 
product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a 
JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 
40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 
a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key 
> 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+3078400
+Warning: Map Join MAPJOIN[17][bigTable=a] in task 'Stage-2:MAPRED' is a cross 
product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a 
LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value 
AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 
a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value 
AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+4937935
+Warning: Map Join MAPJOIN[17][bigTable=b] in task 'Stage-2:MAPRED' is a cross 
product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a 
RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value 
AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 
a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = 
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = 
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = 
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = 
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = 
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUE

[3/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

2015-05-15 Thread gunther
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/vector_join30.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_join30.q.out 
b/ql/src/test/results/clientpositive/vector_join30.q.out
new file mode 100644
index 000..57f9aeb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join30.q.out
@@ -0,0 +1,2194 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcsrc
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcsrc
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, 
Stage-2
+  Stage-8 has a backup stage: Stage-2
+  Stage-5 depends on stages: Stage-8
+  Stage-3 depends on stages: Stage-2, Stage-5, Stage-6
+  Stage-9 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-9
+  Stage-2
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: orcsrc
+Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+Filter Operator
+  predicate: key is not null (type: boolean)
+  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 250 Data size: 44000 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Statistics: Num rows: 250 Data size: 44000 Basic stats: 
COMPLETE Column stats: NONE
+  Execution mode: vectorized
+  Reduce Operator Tree:
+Select Operator
+  expressions: KEY.reducesinkkey0 (type: string)
+  outputColumnNames: _col0
+  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+Conditional Operator
+
+  Stage: Stage-8
+Map Reduce Local Work
+  Alias -> Map Local Tables:
+$INTNAME1 
+  Fetch Operator
+limit: -1
+  Alias -> Map Local Operator Tree:
+$INTNAME1 
+  TableScan
+HashTable Sink Operator
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
+
+  Stage: Stage-5
+Map Reduce
+  Map Operator Tree:
+  TableScan
+Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
+  outputColumnNames: _col2, _col3
+  Group By Operator
+aggregations: sum(hash(_col2,_col3))
+mode: hash
+outputColumnNames: _col0
+File Output Operator
+  compressed: false
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+  Local Work:
+Map Reduce Local Work
+
+  Stage: Stage-3
+Map Reduce
+  Map Operator Tree:
+  TableScan
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  value expressions: _col0 (type: bigint)
+  Reduce Operator Tree:
+Group By Operator
+  aggregations: sum(VALUE._col0)
+  mode: mergepartial
+  outputC

[6/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

2015-05-15 Thread gunther
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/queries/clientpositive/vector_outer_join5.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join5.q 
b/ql/src/test/queries/clientpositive/vector_outer_join5.q
new file mode 100644
index 000..b7ee4a4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_outer_join5.q
@@ -0,0 +1,173 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=1;
+
+-- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint;
+
+ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS;
+ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS;
+
+create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100;
+
+ANALYZE TABLE small_table COMPUTE STATISTICS;
+ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS;
+
+explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1;
+
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1;
+
+explain
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1;
+
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1;
+
+
+create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from 
alltypesorc
+where cint is not null and ctinyint is not null;
+
+ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS;
+ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS;
+
+create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc 
limit 100;
+
+ANALYZE TABLE small_table2 COMPUTE STATISTICS;
+ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS;
+
+explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1;
+
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmod

[1/8] hive git commit: HIVE-10559: IndexOutOfBoundsException with RemoveDynamicPruningBySize (Wei Zhang via Gunther Hagleitner)

2015-05-15 Thread gunther
Repository: hive
Updated Branches:
  refs/heads/branch-1.2 7f237de44 -> d421201cc


HIVE-10559: IndexOutOfBoundsException with RemoveDynamicPruningBySize (Wei 
Zhang via Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/878a282d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/878a282d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/878a282d

Branch: refs/heads/branch-1.2
Commit: 878a282d5c51456e83f90df3044d900fe41af226
Parents: 7f237de
Author: Gunther Hagleitner 
Authored: Thu May 14 15:48:34 2015 -0700
Committer: Gunther Hagleitner 
Committed: Fri May 15 14:35:12 2015 -0700

--
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |  25 ++--
 .../optimizer/RemoveDynamicPruningBySize.java   |  19 ++-
 .../hadoop/hive/ql/parse/GenTezUtils.java   |  16 +++
 .../hive/ql/parse/OptimizeTezProcContext.java   |  14 +++
 .../hadoop/hive/ql/parse/TezCompiler.java   |  19 ++-
 .../dynamic_partition_pruning_2.q   |  17 +++
 .../tez/dynamic_partition_pruning_2.q.out   | 116 +++
 7 files changed, 196 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/878a282d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index d42b643..bcffdbc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -636,13 +636,26 @@ public class ConvertJoinMapJoin implements NodeProcessor {
 // we might have generated a dynamic partition operator chain. Since
 // we're removing the reduce sink we need do remove that too.
 Set> dynamicPartitionOperators = new 
HashSet>();
+Map, AppMasterEventOperator> opEventPairs = new 
HashMap<>();
 for (Operator c : p.getChildOperators()) {
-  if (hasDynamicPartitionBroadcast(c)) {
+  AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
+  if (event != null) {
 dynamicPartitionOperators.add(c);
+opEventPairs.put(c, event);
   }
 }
 for (Operator c : dynamicPartitionOperators) {
-  p.removeChild(c);
+  if (context.pruningOpsRemovedByPriorOpt.isEmpty() ||
+  
!context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
+p.removeChild(c);
+// at this point we've found the fork in the op pipeline that has 
the pruning as a child plan.
+LOG.info("Disabling dynamic pruning for: "
++ ((DynamicPruningEventDesc) 
opEventPairs.get(c).getConf()).getTableScan().getName()
++ ". Need to be removed together with reduce sink");
+  }
+}
+for (Operator op : dynamicPartitionOperators) {
+  context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
 }
   }
   mapJoinOp.getParentOperators().remove(bigTablePosition);
@@ -662,15 +675,13 @@ public class ConvertJoinMapJoin implements NodeProcessor {
 return mapJoinOp;
   }
 
-  private boolean hasDynamicPartitionBroadcast(Operator parent) {
-boolean hasDynamicPartitionPruning = false;
+  private AppMasterEventOperator findDynamicPartitionBroadcast(Operator 
parent) {
 
 for (Operator op : parent.getChildOperators()) {
   while (op != null) {
 if (op instanceof AppMasterEventOperator && op.getConf() instanceof 
DynamicPruningEventDesc) {
   // found dynamic partition pruning operator
-  hasDynamicPartitionPruning = true;
-  break;
+  return (AppMasterEventOperator)op;
 }
 if (op instanceof ReduceSinkOperator || op instanceof 
FileSinkOperator) {
   // crossing reduce sink or file sink means the pruning isn't for 
this parent.
@@ -686,6 +697,6 @@ public class ConvertJoinMapJoin implements NodeProcessor {
   }
 }
 
-return hasDynamicPartitionPruning;
+return null;
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/878a282d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java
index 4803959..5d01311 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RemoveDynamicPruningBySize.java
@@ -24,1

[4/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

2015-05-15 Thread gunther
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out 
b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
new file mode 100644
index 000..1e74446
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
@@ -0,0 +1,234 @@
+PREHOOK: query: explain
+select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+TableScan
+  alias: alltypesorc
+  Filter Operator
+predicate: ((cbigint % 250) = 0) (type: boolean)
+Select Operator
+  expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), 
CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) 
(type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat 
AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), 
CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS 
TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS 
TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: 
timestamp)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
+  ListSink
+
+PREHOOK: query: select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+ A masked pattern was here 
+POSTHOOK: query: select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+ A masked pattern was here 
+1969-12-31 15:59:59.9641969-12-31 15:59:59.8   NULL1969-12-08 
10:43:03.25  1969-12-31 15:59:24 1969-12-31 15:56:40 NULL1969-12-31 
16:00:00 1969-12-31 15:59:45.748 NULLNULL
+1969-12-31 15:59:59.9641969-12-31 15:59:59.8   NULL1970-01-19 
04:24:39 1969-12-31 15:59:24 1969-12-31 15:56:40 NULL1969-12-31 
16:00:00 1969-12-31 15:59:53.817 NULLNULL
+1969-12-31 15:59:59.97 1969-12-31 15:59:59.8   NULL1970-01-17 05:10:52.25  
1969-12-31 15:59:30 1969-12-31 15:56:40 NULL1969-12-31 16:00:00 
1969-12-31 16:00:12.935 NULLNULL
+1969-12-31 15:59:59.949NULL1970-01-09 14:53:20.971 1970-01-12 
20:45:23.25  1969-12-31 15:59:09 NULL1969-12-31 16:00:00 1969-12-31 
16:00:00 1969-12-31 16:00:08.451 NULLNULL
+1969-12-31 15:59:59.949NULL1970-01-09 07:39:13.882 1969-12-09 
07:45:32.75  1969-12-31 15:59:09 NULL1969-12-31 16:00:00.001 1969-12-31 
16:00:00 1969-12-31 16:00:08.451 NULLNULL
+1969-12-31 16:00:00.02 1969-12-31 16:00:15.601 NULL1969-12-27 11:19:26.75  
1969-12-31 16:00:20 1969-12-31

[5/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

2015-05-15 Thread gunther
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out 
b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
new file mode 100644
index 000..2243072
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE 
myinput1_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE 
myinput1_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM 
myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM 
myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+13630578
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
LEFT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
LEFT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+13630578
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
RIGHT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
RIGHT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+13630578
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+4509856
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.value = b.value and a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+ A masked pattern was here 
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
JOIN myinput1 b ON a.value = b.value and a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+ A masked pattern was here 
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a 
LEFT OUTE

[7/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

2015-05-15 Thread gunther
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
index 37ccf22..f971727 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
@@ -24,7 +24,9 @@ import java.util.Arrays;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -123,13 +125,6 @@ public class VectorMapJoinOuterLongOperator extends 
VectorMapJoinOuterGenerateRe
 
   batchCounter++;
 
-  // Do the per-batch setup for an outer join.
-
-  outerPerBatchSetup(batch);
-
-  // For outer join, DO NOT apply filters yet.  It is incorrect for outer 
join to
-  // apply the filter before hash table matching.
-
   final int inputLogicalSize = batch.size;
 
   if (inputLogicalSize == 0) {
@@ -139,6 +134,44 @@ public class VectorMapJoinOuterLongOperator extends 
VectorMapJoinOuterGenerateRe
 return;
   }
 
+  // Do the per-batch setup for an outer join.
+
+  outerPerBatchSetup(batch);
+
+  // For outer join, remember our input rows before ON expression 
filtering or before
+  // hash table matching so we can generate results for all rows (matching 
and non matching)
+  // later.
+  boolean inputSelectedInUse = batch.selectedInUse;
+  if (inputSelectedInUse) {
+// if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+//   throw new HiveException("batch.selected is not in sort order and 
unique");
+// }
+System.arraycopy(batch.selected, 0, inputSelected, 0, 
inputLogicalSize);
+  }
+
+  // Filtering for outer join just removes rows available for hash table 
matching.
+  boolean someRowsFilteredOut =  false;
+  if (bigTableFilterExpressions.length > 0) {
+// Since the input
+for (VectorExpression ve : bigTableFilterExpressions) {
+  ve.evaluate(batch);
+}
+someRowsFilteredOut = (batch.size != inputLogicalSize);
+if (LOG.isDebugEnabled()) {
+  if (batch.selectedInUse) {
+if (inputSelectedInUse) {
+  LOG.debug(CLASS_NAME +
+  " inputSelected " + intArrayToRangesString(inputSelected, 
inputLogicalSize) +
+  " filtered batch.selected " + 
intArrayToRangesString(batch.selected, batch.size));
+} else {
+  LOG.debug(CLASS_NAME +
+" inputLogicalSize " + inputLogicalSize +
+" filtered batch.selected " + 
intArrayToRangesString(batch.selected, batch.size));
+}
+  }
+}
+  }
+
   // Perform any key expressions.  Results will go into scratch columns.
   if (bigTableKeyExpressions != null) {
 for (VectorExpression ve : bigTableKeyExpressions) {
@@ -146,9 +179,6 @@ public class VectorMapJoinOuterLongOperator extends 
VectorMapJoinOuterGenerateRe
 }
   }
 
-  // We rebuild in-place the selected array with rows destine to be 
forwarded.
-  int numSel = 0;
-
   /*
* Single-Column Long specific declarations.
*/
@@ -178,12 +208,16 @@ public class VectorMapJoinOuterLongOperator extends 
VectorMapJoinOuterGenerateRe
  */
 
 JoinUtil.JoinResult joinResult;
-if (!joinColVector.noNulls && joinColVector.isNull[0]) {
-  // Null key is no match for whole batch.
+if (batch.size == 0) {
+  // Whole repeated key batch was filtered out.
+  joinResult = JoinUtil.JoinResult.NOMATCH;
+} else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+  // Any (repeated) null key column is no match for whole batch.
   joinResult = JoinUtil.JoinResult.NOMATCH;
 } else {
   // Handle *repeated* join key, if found.
   long key = vector[0];
+  // LOG.debug(CLASS_NAME + " repeated key " + key);
   if (useMinMax && (key < min || key > max)) {
 // Out of range for whole batch.
 joinResult = JoinUtil.JoinResult.NOMATCH;
@@ -199,7 +233,8 @@ public class VectorMapJoinOuterLongOperator extends 
Vec

hive git commit: HIVE-10714: Bloom filter column names specification should be case insensitive (Prasanth Jayachandran reviewed by Gopal V)

2015-05-15 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/master f82c0c20c -> 3056e7aa8


HIVE-10714: Bloom filter column names specification should be case insensitive 
(Prasanth Jayachandran reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3056e7aa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3056e7aa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3056e7aa

Branch: refs/heads/master
Commit: 3056e7aa856a6b591cf5c19743579f0991a477af
Parents: f82c0c2
Author: Prasanth Jayachandran 
Authored: Fri May 15 11:22:51 2015 -0700
Committer: Prasanth Jayachandran 
Committed: Fri May 15 11:22:51 2015 -0700

--
 ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java | 2 +-
 ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/3056e7aa/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
index ba59b35..0776018 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
@@ -58,7 +58,7 @@ public class OrcUtils {
 int numFlattenedCols = getFlattenedColumnsCount(inspector);
 boolean[] results = new boolean[numFlattenedCols];
 if (selectedColumns != null && !selectedColumns.isEmpty()) {
-  includeColumnsImpl(results, selectedColumns, allColumns, inspector);
+  includeColumnsImpl(results, selectedColumns.toLowerCase(), allColumns, 
inspector);
 }
 return results;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/3056e7aa/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
--
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
index b56c1ca..5b200f1 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
@@ -316,7 +316,7 @@ public class TestFileDump {
 .compress(CompressionKind.ZLIB)
 .bufferSize(1)
 .rowIndexStride(1000)
-.bloomFilterColumns("s");
+.bloomFilterColumns("S");
 Writer writer = OrcFile.createWriter(testFilePath, options);
 Random r1 = new Random(1);
 String[] words = new String[]{"It", "was", "the", "best", "of", "times,",



hive git commit: HIVE-10641 create CRC32 UDF (Alexander Pivovarov, reviewed by Jason Dere)

2015-05-15 Thread apivovarov
Repository: hive
Updated Branches:
  refs/heads/master 84a2235c5 -> f82c0c20c


HIVE-10641 create CRC32 UDF (Alexander Pivovarov, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f82c0c20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f82c0c20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f82c0c20

Branch: refs/heads/master
Commit: f82c0c20c5038f54b4a24561c943ca646a272d18
Parents: 84a2235
Author: Alexander Pivovarov 
Authored: Wed May 6 23:14:29 2015 -0700
Committer: Alexander Pivovarov 
Committed: Fri May 15 10:27:26 2015 -0700

--
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |  2 +
 .../org/apache/hadoop/hive/ql/udf/UDFCrc32.java | 75 
 .../apache/hadoop/hive/ql/udf/TestUDFCrc32.java | 74 +++
 ql/src/test/queries/clientpositive/udf_crc32.q  | 13 
 .../results/clientpositive/show_functions.q.out |  2 +
 .../test/results/clientpositive/udf_crc32.q.out | 60 
 6 files changed, 226 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 606185c..7ce0a1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.udf.UDFBase64;
 import org.apache.hadoop.hive.ql.udf.UDFBin;
 import org.apache.hadoop.hive.ql.udf.UDFConv;
 import org.apache.hadoop.hive.ql.udf.UDFCos;
+import org.apache.hadoop.hive.ql.udf.UDFCrc32;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
 import org.apache.hadoop.hive.ql.udf.UDFDegrees;
 import org.apache.hadoop.hive.ql.udf.UDFE;
@@ -219,6 +220,7 @@ public final class FunctionRegistry {
 system.registerUDF("tan", UDFTan.class, false);
 system.registerUDF("e", UDFE.class, false);
 system.registerGenericUDF("factorial", GenericUDFFactorial.class);
+system.registerUDF("crc32", UDFCrc32.class, false);
 
 system.registerUDF("conv", UDFConv.class, false);
 system.registerUDF("bin", UDFBin.class, false);

http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
new file mode 100644
index 000..c1f0e38
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.util.zip.CRC32;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * UDFCrc32.
+ *
+ */
+@Description(name = "crc32",
+value = "_FUNC_(str or bin) - Computes a cyclic redundancy check value "
++ "for string or binary argument and returns bigint value.",
+extended = "Example:\n"
++ "  > SELECT _FUNC_('ABC');\n"
++ "  2743272264\n"
++ "  > SELECT _FUNC_(binary('ABC'));\n"
++ "  2743272264")
+public class UDFCrc32 extends UDF {
+
+  private final LongWritable result = new LongWritable();
+  private final CRC32 crc32 = new CRC32();
+
+  /**
+   * CRC32 for string
+   */
+  public LongWritable evaluate(Text n) {
+if (n == null) {
+  return null;
+}
+
+crc32.reset();
+crc32.update(n.getBytes(), 0, n.getLength());
+
+result.set(crc32.getValue());
+return result;
+  }
+
+  /**
+   * CRC32 for binary
+   */
+  public LongWritable evaluate(BytesWritable b) {
+if (b == null) {
+  return null;
+

hive git commit: HIVE-10639 create SHA1 UDF (Alexander Pivovarov, reviewed by Jason Dere)

2015-05-15 Thread apivovarov
Repository: hive
Updated Branches:
  refs/heads/master bf7810ab6 -> 84a2235c5


HIVE-10639 create SHA1 UDF (Alexander Pivovarov, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/84a2235c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/84a2235c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/84a2235c

Branch: refs/heads/master
Commit: 84a2235c5590944120bb9319565f5e113b6275a1
Parents: bf7810a
Author: Alexander Pivovarov 
Authored: Wed May 6 20:13:22 2015 -0700
Committer: Alexander Pivovarov 
Committed: Fri May 15 10:23:27 2015 -0700

--
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |  3 +
 .../org/apache/hadoop/hive/ql/udf/UDFSha1.java  | 88 
 .../apache/hadoop/hive/ql/udf/TestUDFSha1.java  | 57 +
 ql/src/test/queries/clientpositive/udf_sha1.q   | 13 +++
 .../results/clientpositive/show_functions.q.out |  2 +
 .../test/results/clientpositive/udf_sha1.q.out  | 61 ++
 6 files changed, 224 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 02a604f..606185c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -86,6 +86,7 @@ import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace;
 import org.apache.hadoop.hive.ql.udf.UDFRepeat;
 import org.apache.hadoop.hive.ql.udf.UDFReverse;
 import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSha1;
 import org.apache.hadoop.hive.ql.udf.UDFSign;
 import org.apache.hadoop.hive.ql.udf.UDFSin;
 import org.apache.hadoop.hive.ql.udf.UDFSpace;
@@ -226,6 +227,8 @@ public final class FunctionRegistry {
 system.registerUDF("base64", UDFBase64.class, false);
 system.registerUDF("unbase64", UDFUnbase64.class, false);
 system.registerUDF("md5", UDFMd5.class, false);
+system.registerUDF("sha1", UDFSha1.class, false);
+system.registerUDF("sha", UDFSha1.class, false);
 
 system.registerGenericUDF("encode", GenericUDFEncode.class);
 system.registerGenericUDF("decode", GenericUDFDecode.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java
new file mode 100644
index 000..04e6f81
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * UDFSha.
+ *
+ */
+@Description(name = "sha1,sha",
+value = "_FUNC_(str or bin) - Calculates the SHA-1 digest for string or 
binary "
++ "and returns the value as a hex string.",
+extended = "Example:\n"
++ "  > SELECT _FUNC_('ABC');\n"
++ "  '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'\n"
++ "  > SELECT _FUNC_(binary('ABC'));\n"
++ "  '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'")
+public class UDFSha1 extends UDF {
+
+  private final Text result = new Text();
+  private final MessageDigest digest;
+
+  public UDFSha1() {
+try {
+  digest = MessageDigest.getInstance("SHA");
+} catch (NoSuchAlgorithmException e) {
+  throw new RuntimeException(e);
+}
+  }
+
+  /**
+   * Convert String to SHA-1
+   */
+  publi

hive git commit: HIVE-10657 Remove copyBytes operation from MD5 UDF (Alexander Pivovarov, reviewed by Jason Dere)

2015-05-15 Thread apivovarov
Repository: hive
Updated Branches:
  refs/heads/master ad62e2ede -> bf7810ab6


HIVE-10657 Remove copyBytes operation from MD5 UDF (Alexander Pivovarov, 
reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bf7810ab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bf7810ab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bf7810ab

Branch: refs/heads/master
Commit: bf7810ab64caabc47d4370d04f80c0c1f182426a
Parents: ad62e2e
Author: Alexander Pivovarov 
Authored: Fri May 8 11:36:47 2015 -0700
Committer: Alexander Pivovarov 
Committed: Fri May 15 10:20:32 2015 -0700

--
 .../org/apache/hadoop/hive/ql/udf/UDFMd5.java   | 33 +---
 1 file changed, 21 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/bf7810ab/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java
index 62c16c2..14928ef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java
@@ -18,7 +18,10 @@
 
 package org.apache.hadoop.hive.ql.udf;
 
-import org.apache.commons.codec.digest.DigestUtils;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+import org.apache.commons.codec.binary.Hex;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
@@ -39,6 +42,15 @@ import org.apache.hadoop.io.Text;
 public class UDFMd5 extends UDF {
 
   private final Text result = new Text();
+  private final MessageDigest digest;
+
+  public UDFMd5() {
+try {
+  digest = MessageDigest.getInstance("MD5");
+} catch (NoSuchAlgorithmException e) {
+  throw new RuntimeException(e);
+}
+  }
 
   /**
* Convert String to md5
@@ -48,8 +60,10 @@ public class UDFMd5 extends UDF {
   return null;
 }
 
-String str = n.toString();
-String md5Hex = DigestUtils.md5Hex(str);
+digest.reset();
+digest.update(n.getBytes(), 0, n.getLength());
+byte[] md5Bytes = digest.digest();
+String md5Hex = Hex.encodeHexString(md5Bytes);
 
 result.set(md5Hex);
 return result;
@@ -63,17 +77,12 @@ public class UDFMd5 extends UDF {
   return null;
 }
 
-byte[] bytes = copyBytes(b);
-String md5Hex = DigestUtils.md5Hex(bytes);
+digest.reset();
+digest.update(b.getBytes(), 0, b.getLength());
+byte[] md5Bytes = digest.digest();
+String md5Hex = Hex.encodeHexString(md5Bytes);
 
 result.set(md5Hex);
 return result;
   }
-
-  protected byte[] copyBytes(BytesWritable b) {
-int size = b.getLength();
-byte[] result = new byte[size];
-System.arraycopy(b.getBytes(), 0, result, 0, size);
-return result;
-  }
 }