hive git commit: HIVE-17131: Add InterfaceAudience and InterfaceStability annotations for SerDe APIs (Sahil Takiar, reviewed by Ashutosh Chauhan)

2017-07-25 Thread stakiar
Repository: hive
Updated Branches:
  refs/heads/branch-2 726f270a6 -> a9e5427e5


HIVE-17131: Add InterfaceAudience and InterfaceStability annotations for SerDe 
APIs (Sahil Takiar, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a9e5427e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a9e5427e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a9e5427e

Branch: refs/heads/branch-2
Commit: a9e5427e5185e47ce2dc28130e475a7037406463
Parents: 726f270
Author: Sahil Takiar 
Authored: Tue Jul 25 21:19:24 2017 -0700
Committer: Sahil Takiar 
Committed: Tue Jul 25 21:21:06 2017 -0700

--
 .../org/apache/hadoop/hive/serde2/AbstractDeserializer.java| 4 
 .../src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java  | 4 
 .../java/org/apache/hadoop/hive/serde2/AbstractSerializer.java | 4 
 serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java | 4 
 .../src/java/org/apache/hadoop/hive/serde2/SerDeException.java | 6 +-
 serde/src/java/org/apache/hadoop/hive/serde2/SerDeStats.java   | 5 +
 serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java   | 4 
 .../hadoop/hive/serde2/objectinspector/ObjectInspector.java| 5 +
 8 files changed, 35 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a9e5427e/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java
--
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java
index 869b86b..e850406 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.hive.serde2;
 import java.util.Properties;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.classification.InterfaceAudience;
+import org.apache.hadoop.hive.common.classification.InterfaceStability;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.io.Writable;
 
@@ -29,6 +31,8 @@ import org.apache.hadoop.io.Writable;
  * new methods can be added in the underlying interface, Deserializer, and 
only implementations
  * that need those methods overwrite it.
  */
+@InterfaceAudience.Public
+@InterfaceStability.Stable
 public abstract class AbstractDeserializer implements Deserializer {
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/a9e5427e/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
--
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
index 049b35d..939d90b 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
@@ -22,6 +22,8 @@ import java.util.Map;
 import java.util.Properties;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.classification.InterfaceAudience;
+import org.apache.hadoop.hive.common.classification.InterfaceStability;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.io.Writable;
 
@@ -32,6 +34,8 @@ import javax.annotation.Nullable;
  * new methods can be added in the underlying interface, SerDe, and only 
implementations
  * that need those methods overwrite it.
  */
+@InterfaceAudience.Public
+@InterfaceStability.Stable
 public abstract class AbstractSerDe implements Deserializer, Serializer {
 
   protected String configErrors;

http://git-wip-us.apache.org/repos/asf/hive/blob/a9e5427e/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java
--
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java
index 570b4bb..8b30056 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.hive.serde2;
 import java.util.Properties;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.classification.InterfaceAudience;
+import org.apache.hadoop.hive.common.classification.InterfaceStability;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.io.Writable;
 
@@ -29,6 +31,8 @@ import org.apache.hadoop.io.Writable;
  * new methods can be added in 

[04/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
--
diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out 
b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
index 5593e42..559f05e 100644
--- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
+++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
@@ -14,10 +14,31 @@ PREHOOK: Input: default@src
 POSTHOOK: query: describe formatted src key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-keystring  
0   309 2.812   
3   
from deserializer   
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}

+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+keystring  
0   309 2.812   
3   
SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/
 
+wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF   

 
+wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+   

 
+mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE   

 
+CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H   

 
+ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+   

 
+iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9   

 
+BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD   

 
+w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS   

 
+CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK   

 
+wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/   

 
+yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J   

 
+gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8   

 
+3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32   

 
+CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB   

 
+2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC   

 
++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC   

 
+z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr   

 
+A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwA

[11/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b79fe6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b79fe6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b79fe6

Branch: refs/heads/master
Commit: f8b79fe6d136f348820ce81dc7a6883f1e70dcfc
Parents: 892841a
Author: Pengcheng Xiong 
Authored: Tue Jul 25 15:41:14 2017 -0700
Committer: Pengcheng Xiong 
Committed: Tue Jul 25 15:41:14 2017 -0700

--
 .../apache/hadoop/hive/common/ndv/FMSketch.java | 117 +--
 .../ndv/NumDistinctValueEstimatorFactory.java   |  30 +-
 .../hive/common/ndv/fm/FMSketchUtils.java   | 133 
 .../hadoop/hive/common/ndv/hll/HyperLogLog.java |   4 +-
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +-
 .../ndv/fm/TestFMSketchSerialization.java   |  97 +++
 data/conf/hive-site.xml |   4 +
 data/conf/llap/hive-site.xml|   4 +
 data/conf/perf-reg/hive-site.xml|   5 +
 data/conf/tez/hive-site.xml |   5 +
 .../upgrade/derby/044-HIVE-16997.derby.sql  |   1 +
 .../upgrade/derby/hive-schema-3.0.0.derby.sql   |   2 +-
 .../derby/upgrade-2.3.0-to-3.0.0.derby.sql  |   1 +
 .../upgrade/mssql/029-HIVE-16997.mssql.sql  |   1 +
 .../upgrade/mssql/hive-schema-3.0.0.mssql.sql   |   1 +
 .../mssql/upgrade-2.3.0-to-3.0.0.mssql.sql  |   1 +
 .../upgrade/mysql/044-HIVE-16997.mysql.sql  |   1 +
 .../upgrade/mysql/hive-schema-3.0.0.mysql.sql   |   1 +
 .../mysql/upgrade-2.3.0-to-3.0.0.mysql.sql  |   1 +
 .../upgrade/oracle/044-HIVE-16997.oracle.sql|   1 +
 .../upgrade/oracle/hive-schema-3.0.0.oracle.sql |   1 +
 .../oracle/upgrade-2.3.0-to-3.0.0.oracle.sql|   1 +
 .../postgres/043-HIVE-16997.postgres.sql|   1 +
 .../postgres/hive-schema-3.0.0.postgres.sql |   1 +
 .../upgrade-2.3.0-to-3.0.0.postgres.sql |   1 +
 .../hive/metastore/MetaStoreDirectSql.java  |  98 ++-
 .../hadoop/hive/metastore/MetaStoreUtils.java   |  49 +-
 .../hadoop/hive/metastore/ObjectStore.java  |  20 +-
 .../hive/metastore/StatObjectConverter.java |  40 +-
 .../hive/metastore/cache/CachedStore.java   |  50 +-
 .../aggr/DateColumnStatsAggregator.java | 358 +
 .../aggr/StringColumnStatsAggregator.java   | 303 
 .../hadoop/hive/metastore/hbase/StatsCache.java |  11 +-
 .../stats/BinaryColumnStatsAggregator.java  |   2 +-
 .../stats/BooleanColumnStatsAggregator.java |   2 +-
 .../hbase/stats/ColumnStatsAggregator.java  |   4 +-
 .../stats/ColumnStatsAggregatorFactory.java |  14 +-
 .../stats/DecimalColumnStatsAggregator.java |  35 +-
 .../stats/DoubleColumnStatsAggregator.java  |  33 +-
 .../hbase/stats/IExtrapolatePartStatus.java |   2 +-
 .../hbase/stats/LongColumnStatsAggregator.java  |  34 +-
 .../stats/StringColumnStatsAggregator.java  | 122 ---
 .../stats/merge/BinaryColumnStatsMerger.java|   2 +-
 .../stats/merge/BooleanColumnStatsMerger.java   |   2 +-
 .../hbase/stats/merge/ColumnStatsMerger.java|   2 +-
 .../stats/merge/ColumnStatsMergerFactory.java   |   2 +-
 .../stats/merge/DateColumnStatsMerger.java  |   2 +-
 .../stats/merge/DecimalColumnStatsMerger.java   |   2 +-
 .../stats/merge/DoubleColumnStatsMerger.java|   2 +-
 .../stats/merge/LongColumnStatsMerger.java  |   2 +-
 .../stats/merge/StringColumnStatsMerger.java|   2 +-
 .../model/MPartitionColumnStatistics.java   |  24 +-
 .../metastore/model/MTableColumnStatistics.java |  24 +-
 metastore/src/model/package.jdo |   6 +
 .../hadoop/hive/metastore/TestOldSchema.java| 229 ++
 .../hive/metastore/cache/TestCachedStore.java   | 156 
 ...stHBaseAggregateStatsCacheWithBitVector.java |  13 +-
 .../TestHBaseAggregateStatsExtrapolation.java   |  11 +-
 .../TestHBaseAggregateStatsNDVUniformDist.java  |  15 +-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |   2 +-
 .../formatting/MetaDataFormatUtils.java |  19 +-
 .../hadoop/hive/ql/plan/ColStatistics.java  |   3 -
 .../hadoop/hive/ql/plan/DescTableDesc.java  |   4 +-
 .../ql/udf/generic/GenericUDAFComputeStats.java |   2 +-
 ...lter_table_update_status_disable_bitvector.q | 139 
 ql/src/test/queries/clientpositive/bitvector.q  |   3 +
 ql/src/test/queries/clientpositive/fm-sketch.q  |  58 ++
 ql/src/test/queries/clientpositive/hll.q|  11 +
 .../clientpositive/alterColumnStats.q.out   |  16 +-
 .../clientpositive/alterColumnStatsPart.q.out   |   6 +-
 .../alter_partition_update_status.q.out |  40 +-
 .../alter_table_column_stats.q.out  | 764 +++
 .../alter_table_update_status.q.out | 394 +-
 ..._table_update_status_disable_bitvector.q.out | 708 +

[09/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
--
diff --git 
a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java 
b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
new file mode 100644
index 000..54828f2
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.FileMetadataExprType;
+import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
+import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestOldSchema {
+  private ObjectStore store = null;
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestOldSchema.class.getName());
+
+  public static class MockPartitionExpressionProxy implements 
PartitionExpressionProxy {
+@Override
+public String convertExprToFilter(byte[] expr) throws MetaException {
+  return null;
+}
+
+@Override
+public boolean filterPartitionsByExpr(List partColumnNames,
+List partColumnTypeInfos, byte[] expr, String 
defaultPartitionName,
+List partitionNames) throws MetaException {
+  return false;
+}
+
+@Override
+public FileMetadataExprType getMetadataType(String inputFormat) {
+  return null;
+}
+
+@Override
+public SearchArgument createSarg(byte[] expr) {
+  return null;
+}
+
+@Override
+public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) {
+  return null;
+}
+  }
+
+  String bitVectors[] = new String[2];
+
+  @Before
+  public void setUp() throws Exception {
+HiveConf conf = new HiveConf();
+conf.setVar(HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS,
+MockPartitionExpressionProxy.class.getName());
+conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR, false);
+
+store = new ObjectStore();
+store.setConf(conf);
+dropAllStoreObjects(store);
+
+HyperLogLog hll = HyperLogLog.builder().build();
+hll.addLong(1);
+bitVectors[1] = hll.serialize();
+hll = HyperLogLog.builder().build();
+hll.addLong(2);
+hll.addLong(3);
+hll.addLong(3);
+hll.addLong(4);
+bitVectors[0] = hll.serialize();
+  }
+
+  @Aft

[03/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out 
b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
index b5f4fee..b6aedc4 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
@@ -89,18 +89,20 @@ PREHOOK: Input: default@loc_orc_1d
 POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-state  string  
0   3   0.75
2   
from deserializer   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+state  string  
0   3   0.75
2   
SExM4AMDgaTbFcD8mOYCwMOJoQQ= 
+   from deserializer   
 
 PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@loc_orc_1d
 POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@loc_orc_1d
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-state  string  
0   6   3.0 
3   
from deserializer   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+state  string  
0   6   3.0 
3   
SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7 
+   from deserializer   
 
 PREHOOK: query: explain extended select state from loc_orc_1d
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select state from loc_orc_1d
@@ -296,12 +298,12 @@ STAGE PLANS:
   Processor Tree:
 TableScan
   alias: loc_orc_1d
-  Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE 
Column stats: PARTIAL
+  Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE 
Column stats: PARTIAL
   GatherStats: false
   Select Operator
 expressions: state (type: string)
 outputColumnNames: _col0
-Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE 
Column stats: PARTIAL
+Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE 
Column stats: PARTIAL
 ListSink
 
 PREHOOK: query: explain extended select state,locid from loc_orc_1d
@@ -499,12 +501,12 @@ STAGE PLANS:
   Processor Tree:
 TableScan
   alias: loc_orc_1d
-  Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE 
Column stats: PARTIAL
+  Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE 
Column stats: PARTIAL
   GatherStats: false

[07/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_update_status.q.out
--
diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out 
b/ql/src/test/results/clientpositive/alter_table_update_status.q.out
index 9cd9a8d..f23ba57 100644
--- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out
@@ -46,10 +46,12 @@ PREHOOK: Input: default@src_stat
 POSTHOOK: query: describe formatted src_stat key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src_stat
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-keystring  
0   16  1.72
3   
from deserializer   
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}  
 
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+keystring  
0   16  1.72
3   
SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
 
+myn/wfcugOGjfsCYzA0=   
 
+   from deserializer   
 
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}  
 
 PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET 
('numDVs'='','avgColLen'='1.111')
 PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
 POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET 
('numDVs'='','avgColLen'='1.111')
@@ -60,10 +62,12 @@ PREHOOK: Input: default@src_stat
 POSTHOOK: query: describe formatted src_stat key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src_stat
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-keystring  
0   1.111   
3   
from deserializer   
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}  
 
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+keystring  
0   1.111   
3   
SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV
 
+myn/wfcugOGjfsCYzA0=   
 
+   from deserializer   
 
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}  
 
 PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET 
('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124')
 PREHOOK: type: ALTERTABL

[01/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 892841a46 -> f8b79fe6d


http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out 
b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
index 19546c3..893aea3 100644
--- 
a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
+++ 
b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
@@ -162,18 +162,20 @@ PREHOOK: Input: default@ex_table
 POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@ex_table
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-keyint 0   9   
0   6   

from deserializer   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+keyint 0   9   
0   6   

SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE= 
+   from deserializer   
 
 PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@ex_table
 POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@ex_table
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-value  string  
0   6   5.0 
5   
from deserializer   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+value  string  
0   6   5.0 
5   
SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs= 
+   from deserializer   
 
 PREHOOK: query: ALTER TABLE ex_table PARTITION (part='part1') RENAME TO 
PARTITION (part='part2')
 PREHOOK: type: ALTERTABLE_RENAMEPART
 PREHOOK: Input: default@ex_table
@@ -310,15 +312,17 @@ PREHOOK: Input: default@ex_table
 POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@ex_table
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-keyint 0   9   
0   6   

from deserializer   
+# col_name data_type   min max 
num_nulls  

[05/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
--
diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out 
b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
index 0f28225..ca1ec00 100644
--- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
+++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
@@ -41,20 +41,22 @@ PREHOOK: Input: default@all_nulls
 POSTHOOK: query: describe formatted all_nulls a
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@all_nulls
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-a  bigint  0   0   
5   0   

from deserializer   
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+a  bigint  0   0   
5   1   

SExM4AEA 
+   from deserializer   
 
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
   
 PREHOOK: query: describe formatted all_nulls b
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@all_nulls
 POSTHOOK: query: describe formatted all_nulls b
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@all_nulls
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-b  double  0.0 0.0 
5   0   

from deserializer   
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+b  double  0.0 0.0 
5   1   

SExM4AEA 
+   from deserializer   
 
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
   
 PREHOOK: query: drop table all_nulls
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@all_nulls

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
 
b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
index fb833bc..74085bf 100644
--- 
a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces

[08/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
--
diff --git a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out 
b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
index 96dce1e..3676204 100644
--- a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out
@@ -123,30 +123,33 @@ PREHOOK: Input: statsdb1@testtable0
 POSTHOOK: query: describe formatted statsdb1.testtable0 col1
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-col1   int 27  484 
0   10  

from deserializer   
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
  
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+col1   int 27  484 
0   10  

SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== 
+   from deserializer   
 
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
  
 PREHOOK: query: describe formatted statsdb1.testtable0 col2
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: statsdb1@testtable0
 POSTHOOK: query: describe formatted statsdb1.testtable0 col2
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-col2   string  
0   10  6.7 
7   
from deserializer   
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
  
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+col2   string  
0   10  6.7 
7   
SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== 
+   from deserializer   
 
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}
  
 PREHOOK: query: describe formatted statsdb1.testtable0 col3
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: statsdb1@testtable0
 POSTHOOK: query: describe formatted statsdb1.testtable0 col3
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testtable0
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
 

[10/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
new file mode 100644
index 000..6fae3e5
--- /dev/null
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
@@ -0,0 +1,358 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.columnstats.aggr;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Date;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
+IExtrapolatePartStatus {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(DateColumnStatsAggregator.class);
+
+  @Override
+  public ColumnStatisticsObj aggregate(String colName, List partNames,
+  List css) throws MetaException {
+ColumnStatisticsObj statsObj = null;
+
+// check if all the ColumnStatisticsObjs contain stats and all the ndv are
+// bitvectors
+boolean doAllPartitionContainStats = partNames.size() == css.size();
+LOG.debug("doAllPartitionContainStats for " + colName + " is " + 
doAllPartitionContainStats);
+NumDistinctValueEstimator ndvEstimator = null;
+String colType = null;
+for (ColumnStatistics cs : css) {
+  if (cs.getStatsObjSize() != 1) {
+throw new MetaException(
+"The number of columns should be exactly one in aggrStats, but 
found "
++ cs.getStatsObjSize());
+  }
+  ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+  if (statsObj == null) {
+colType = cso.getColType();
+statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType, cso
+.getStatsData().getSetField());
+  }
+  if (!cso.getStatsData().getDateStats().isSetBitVectors()
+  || cso.getStatsData().getDateStats().getBitVectors().length() == 0) {
+ndvEstimator = null;
+break;
+  } else {
+// check if all of the bit vectors can merge
+NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory
+
.getNumDistinctValueEstimator(cso.getStatsData().getDateStats().getBitVectors());
+if (ndvEstimator == null) {
+  ndvEstimator = estimator;
+} else {
+  if (ndvEstimator.canMerge(estimator)) {
+continue;
+  } else {
+ndvEstimator = null;
+break;
+  }
+}
+  }
+}
+if (ndvEstimator != null) {
+  ndvEstimator = NumDistinctValueEstimatorFactory
+  .getEmptyNumDistinctValueEstimator(ndvEstimator);
+}
+LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
+ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
+if (doAllPartitionContainStats || css.size() < 2) {
+  DateColumnStatsData aggregateData = null;
+  long lowerBound = 0;
+  long higherBound = 0;
+  double densityAvgSum = 0.0;
+  for (ColumnStatistics cs : css) {
+ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
+DateColumnStatsData newData = cso.getStatsData().getDateStats();
+lowerBound = Math.max(lowe

[02/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out 
b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
index 5e64743..20e59a3 100644
--- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
+++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out
@@ -80,36 +80,40 @@ PREHOOK: Input: default@partcolstats
 POSTHOOK: query: describe formatted partcolstats partition (ds=date 
'2015-04-02', hr=2, part='partA') key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@partcolstats
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-keyint 27  484 
0   20  

from deserializer   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+keyint 27  484 
0   20  

SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De
 
+jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6   
 
+   from deserializer   
 
 PREHOOK: query: describe formatted partcolstats partition (ds=date 
'2015-04-02', hr=2, part='partA') value
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@partcolstats
 POSTHOOK: query: describe formatted partcolstats partition (ds=date 
'2015-04-02', hr=2, part='partA') value
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@partcolstats
-# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
-   
 
-value  string  
0   20  6.8 
7   
from deserializer   
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+value  string  
0   20  6.8 
7   
SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL
 
+WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg==   
 
+   from deserializer   
 
 PREHOOK: query: describe formatted partcolstats partition (ds=date 
'2015-04-02', hr=2, part='partB') key
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@partcolstats
 POSTHOOK: query: describe formatted partcolstats partition (ds=date 
'2015-04-02', hr=2, part='partB') key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@partcolstats
-# col_name data_type   comment 
 
-   
 
-keyint from deserializer   
 
+# col_name data_type   comment   

[06/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2017-07-25 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
 
b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
new file mode 100644
index 000..1dcc1fc
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out
@@ -0,0 +1,708 @@
+PREHOOK: query: create table src_stat as select * from src1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_stat
+POSTHOOK: query: create table src_stat as select * from src1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_stat
+POSTHOOK: Lineage: src_stat.key SIMPLE [(src1)src1.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: src_stat.value SIMPLE [(src1)src1.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: create table src_stat_int (
+  key double,
+  value   string
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_stat_int
+POSTHOOK: query: create table src_stat_int (
+  key double,
+  value   string
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_stat_int
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE 
src_stat_int
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@src_stat_int
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE 
src_stat_int
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@src_stat_int
+PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_stat
+ A masked pattern was here 
+POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_stat
+ A masked pattern was here 
+PREHOOK: query: describe formatted src_stat key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat
+POSTHOOK: query: describe formatted src_stat key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+keystring  
0   16  1.72
3   
from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}  
 
+PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET 
('numDVs'='','avgColLen'='1.111')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET 
('numDVs'='','avgColLen'='1.111')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: query: describe formatted src_stat key
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_stat
+POSTHOOK: query: describe formatted src_stat key
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_stat
+# col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment bitVector   
+   
 
+keystring  
0   1.111   
3   
from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}  
 
+PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET 
('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET

[2/2] hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) ADDENDUM)

2017-07-25 Thread sershe
HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal 
Vijayaraghavan) ADDENDUM)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/892841a4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/892841a4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/892841a4

Branch: refs/heads/master
Commit: 892841a46e3d3026d075b0af85c82196d7bf7f40
Parents: 20276d2
Author: sergey 
Authored: Tue Jul 25 13:56:27 2017 -0700
Committer: sergey 
Committed: Tue Jul 25 13:56:49 2017 -0700

--
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/892841a4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index cf80a6c..dd9ad71 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3003,7 +3003,7 @@ public class HiveConf extends Configuration {
 LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb",
 new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false),
 "The buffer size for a per-fragment LLAP debug trace. 0 to disable."),
-LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO#
+LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", false,
 "Whether to always dump the LLAP IO trace (if enabled); the default is 
on error."),
 
LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", 
true,
 "Whether the LLAP IO layer is enabled for non-vectorized queries that 
read inputs\n" +



[1/2] hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) ADDENDUM)

2017-07-25 Thread sershe
Repository: hive
Updated Branches:
  refs/heads/branch-2 288b5bfe6 -> 726f270a6
  refs/heads/master 20276d211 -> 892841a46


HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal 
Vijayaraghavan) ADDENDUM)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/726f270a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/726f270a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/726f270a

Branch: refs/heads/branch-2
Commit: 726f270a6e5c720a98ac58f2c4a549e70b45fbad
Parents: 288b5bf
Author: sergey 
Authored: Tue Jul 25 13:56:27 2017 -0700
Committer: sergey 
Committed: Tue Jul 25 13:56:27 2017 -0700

--
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/726f270a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 91c2552..30506b0 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2900,7 +2900,7 @@ public class HiveConf extends Configuration {
 LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb",
 new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false),
 "The buffer size for a per-fragment LLAP debug trace. 0 to disable."),
-LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO#
+LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", false,
 "Whether to always dump the LLAP IO trace (if enabled); the default is 
on error."),
 
LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", 
true,
 "Whether the LLAP IO layer is enabled for non-vectorized queries that 
read inputs\n" +



hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)

2017-07-25 Thread sershe
Repository: hive
Updated Branches:
  refs/heads/branch-2 c37fdf96e -> 288b5bfe6


HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal 
Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/288b5bfe
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/288b5bfe
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/288b5bfe

Branch: refs/heads/branch-2
Commit: 288b5bfe62405d93513625c2732c7ef5f80c9d43
Parents: c37fdf9
Author: sergey 
Authored: Tue Jul 25 12:50:25 2017 -0700
Committer: sergey 
Committed: Tue Jul 25 12:50:25 2017 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   5 +
 .../hive/llap/io/api/impl/LlapIoImpl.java   |   7 +-
 .../llap/io/decode/EncodedDataConsumer.java |   1 +
 .../io/decode/GenericColumnVectorProducer.java  |   9 +-
 .../llap/io/decode/OrcColumnVectorProducer.java |  15 +-
 .../llap/io/decode/OrcEncodedDataConsumer.java  |  12 +
 .../llap/io/encoded/OrcEncodedDataReader.java   |  57 ++-
 .../ql/io/orc/encoded/EncodedReaderImpl.java| 102 ++--
 .../hadoop/hive/ql/io/orc/encoded/IoTrace.java  | 492 +++
 .../hadoop/hive/ql/io/orc/encoded/Reader.java   |   2 +-
 .../hive/ql/io/orc/encoded/ReaderImpl.java  |   6 +-
 .../io/orc/encoded/TestEncodedReaderImpl.java   |  14 +-
 12 files changed, 655 insertions(+), 67 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/288b5bfe/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 365e3e7..91c2552 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2897,6 +2897,11 @@ public class HiveConf extends Configuration {
 -1f, "The customized fraction of JVM memory which Tez will reserve for 
the processor"),
 // The default is different on the client and server, so it's null here.
 LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer 
is enabled."),
+LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb",
+new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false),
+"The buffer size for a per-fragment LLAP debug trace. 0 to disable."),
+LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO#
+"Whether to always dump the LLAP IO trace (if enabled); the default is 
on error."),
 
LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", 
true,
 "Whether the LLAP IO layer is enabled for non-vectorized queries that 
read inputs\n" +
 "that can be vectorized"),

http://git-wip-us.apache.org/repos/asf/hive/blob/288b5bfe/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
index 5a86114..daf7101 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
@@ -61,10 +61,12 @@ import 
org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics;
 import org.apache.hadoop.hive.llap.metrics.MetricsUtils;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.metrics2.util.MBeans;
+import org.apache.hive.common.util.FixedSizedObjectPool;
 
 import com.google.common.primitives.Ints;
 import com.google.common.util.concurrent.ListeningExecutorService;
@@ -183,10 +185,11 @@ public class LlapIoImpl implements 
LlapIo {
 new LinkedBlockingQueue(),
 new 
ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build());
 // TODO: this should depends on input format and be in a map, or something.
+FixedSizedObjectPool tracePool = IoTrace.createTracePool(conf);
 this.orcCvp = new OrcColumnVectorProducer(
-metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics);
+metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics, 
tracePool);
 this.genericCvp = isEncodeEnabled ? new GenericColumnVectorProducer(
-serdeCache, bufferManager, conf, cacheMetrics, ioMetrics) : null;
+serdeCache, bufferManager, conf, c

hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)

2017-07-25 Thread sershe
Repository: hive
Updated Branches:
  refs/heads/master 4af249581 -> 20276d211


HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal 
Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/20276d21
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/20276d21
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/20276d21

Branch: refs/heads/master
Commit: 20276d2113f669a2ea08480ce76df9bd6b913d09
Parents: 4af2495
Author: sergey 
Authored: Tue Jul 25 12:24:54 2017 -0700
Committer: sergey 
Committed: Tue Jul 25 12:24:54 2017 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   5 +
 .../hive/llap/io/api/impl/LlapIoImpl.java   |   8 +-
 .../llap/io/decode/EncodedDataConsumer.java |   1 +
 .../io/decode/GenericColumnVectorProducer.java  |   9 +-
 .../llap/io/decode/OrcColumnVectorProducer.java |  15 +-
 .../llap/io/decode/OrcEncodedDataConsumer.java  |  12 +
 .../llap/io/encoded/OrcEncodedDataReader.java   |  56 ++-
 .../ql/io/orc/encoded/EncodedReaderImpl.java| 102 ++--
 .../hadoop/hive/ql/io/orc/encoded/IoTrace.java  | 478 +++
 .../hadoop/hive/ql/io/orc/encoded/Reader.java   |   2 +-
 .../hive/ql/io/orc/encoded/ReaderImpl.java  |   6 +-
 .../io/orc/encoded/TestEncodedReaderImpl.java   |  14 +-
 12 files changed, 641 insertions(+), 67 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/20276d21/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f5e5974..cf80a6c 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3000,6 +3000,11 @@ public class HiveConf extends Configuration {
 -1f, "The customized fraction of JVM memory which Tez will reserve for 
the processor"),
 // The default is different on the client and server, so it's null here.
 LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer 
is enabled."),
+LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb",
+new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false),
+"The buffer size for a per-fragment LLAP debug trace. 0 to disable."),
+LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO#
+"Whether to always dump the LLAP IO trace (if enabled); the default is 
on error."),
 
LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", 
true,
 "Whether the LLAP IO layer is enabled for non-vectorized queries that 
read inputs\n" +
 "that can be vectorized"),

http://git-wip-us.apache.org/repos/asf/hive/blob/20276d21/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
index 53c9bae..35b9d1f 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
@@ -58,11 +58,14 @@ import 
org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics;
 import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics;
 import org.apache.hadoop.hive.llap.metrics.MetricsUtils;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.metrics2.util.MBeans;
+import org.apache.hive.common.util.FixedSizedObjectPool;
+
 
 import com.google.common.primitives.Ints;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
@@ -184,11 +187,12 @@ public class LlapIoImpl implements 
LlapIo {
 0L, TimeUnit.MILLISECONDS,
 new LinkedBlockingQueue(),
 new 
ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build());
+FixedSizedObjectPool tracePool = IoTrace.createTracePool(conf);
 // TODO: this should depends on input format and be in a map, or something.
 this.orcCvp = new OrcColumnVectorProducer(
-metadataCache, cache, bufferManagerOrc, conf, cacheMetrics, ioMetrics);
+metadataCache, cache, bufferManagerOrc, conf, cacheMetrics, ioMetrics, 
tracePool);
 this.genericCvp = isEncodeEnabled ? new GenericColumnVectorProducer(
-serde

svn commit: r1015963 - in /websites/production/hive/content/javadocs/r2.2.0: ./ api/ api/org/ api/org/apache/ api/org/apache/hadoop/ api/org/apache/hadoop/fs/ api/org/apache/hadoop/fs/class-use/ api/o

2017-07-25 Thread omalley
Author: omalley
Date: Tue Jul 25 19:20:24 2017
New Revision: 1015963

Log:
Adding javadoc for 2.2.0


[This commit notification would consist of 4097 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]


hive git commit: Preparing for 2.2.1 development.

2017-07-25 Thread omalley
Repository: hive
Updated Branches:
  refs/heads/branch-2.2 da840b0f8 -> 1ed1f2807


Preparing for 2.2.1 development.

Signed-off-by: Owen O'Malley 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ed1f280
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ed1f280
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ed1f280

Branch: refs/heads/branch-2.2
Commit: 1ed1f2807d411bf14357750adf4a57d6d7bfb62a
Parents: da840b0
Author: Owen O'Malley 
Authored: Tue Jul 25 10:46:35 2017 -0700
Committer: Owen O'Malley 
Committed: Tue Jul 25 10:46:35 2017 -0700

--
 accumulo-handler/pom.xml| 2 +-
 ant/pom.xml | 2 +-
 beeline/pom.xml | 2 +-
 cli/pom.xml | 2 +-
 common/pom.xml  | 2 +-
 contrib/pom.xml | 2 +-
 druid-handler/pom.xml   | 2 +-
 hbase-handler/pom.xml   | 2 +-
 hcatalog/core/pom.xml   | 2 +-
 hcatalog/hcatalog-pig-adapter/pom.xml   | 2 +-
 hcatalog/pom.xml| 2 +-
 hcatalog/server-extensions/pom.xml  | 2 +-
 hcatalog/streaming/pom.xml  | 2 +-
 hcatalog/webhcat/java-client/pom.xml| 2 +-
 hcatalog/webhcat/svr/pom.xml| 2 +-
 hplsql/pom.xml  | 2 +-
 hwi/pom.xml | 2 +-
 itests/custom-serde/pom.xml | 2 +-
 itests/custom-udfs/pom.xml  | 2 +-
 itests/custom-udfs/udf-classloader-udf1/pom.xml | 2 +-
 itests/custom-udfs/udf-classloader-udf2/pom.xml | 2 +-
 itests/custom-udfs/udf-classloader-util/pom.xml | 2 +-
 itests/hcatalog-unit/pom.xml| 2 +-
 itests/hive-jmh/pom.xml | 2 +-
 itests/hive-minikdc/pom.xml | 2 +-
 itests/hive-unit-hadoop2/pom.xml| 2 +-
 itests/hive-unit/pom.xml| 2 +-
 itests/pom.xml  | 2 +-
 itests/qtest-accumulo/pom.xml   | 2 +-
 itests/qtest-spark/pom.xml  | 2 +-
 itests/qtest/pom.xml| 2 +-
 itests/test-serde/pom.xml   | 2 +-
 itests/util/pom.xml | 2 +-
 jdbc/pom.xml| 2 +-
 llap-client/pom.xml | 2 +-
 llap-common/pom.xml | 2 +-
 llap-ext-client/pom.xml | 2 +-
 llap-server/pom.xml | 2 +-
 llap-tez/pom.xml| 2 +-
 metastore/pom.xml   | 2 +-
 orc/pom.xml | 2 +-
 packaging/pom.xml   | 2 +-
 pom.xml | 2 +-
 ql/pom.xml  | 2 +-
 serde/pom.xml   | 2 +-
 service-rpc/pom.xml | 2 +-
 service/pom.xml | 2 +-
 shims/0.23/pom.xml  | 2 +-
 shims/aggregator/pom.xml| 2 +-
 shims/common/pom.xml| 2 +-
 shims/pom.xml   | 2 +-
 shims/scheduler/pom.xml | 2 +-
 spark-client/pom.xml| 4 ++--
 testutils/pom.xml   | 2 +-
 54 files changed, 55 insertions(+), 55 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1ed1f280/accumulo-handler/pom.xml
--
diff --git a/accumulo-handler/pom.xml b/accumulo-handler/pom.xml
index 210f441..6105ad8 100644
--- a/accumulo-handler/pom.xml
+++ b/accumulo-handler/pom.xml
@@ -19,7 +19,7 @@
   
 org.apache.hive
 hive
-2.2.0
+2.2.1-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/hive/blob/1ed1f280/ant/pom.xml
--
diff --git a/ant/pom.xml b/ant/pom.xml
index 3236797..3140b82 100644
--- a/ant/pom.xml
+++ b/ant/pom.xml
@@ -19,7 +19,7 @@
   
 org.apache.hive
 hive
-2.2.0
+2.2.1-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/hive/blob/1ed1f280/beeline/pom.xml
--
diff --git a/beeline/pom.xml b/beeline/pom.xml
index 20483f7..667a152 100644
--- a/beeline/pom.xml
+++ b/beeline/pom.xml
@@ -19,7 +19,7 @@
   
 org.apache.hive
 hive
-2.2.0
+2.2.1-SNAPSHOT
 ../pom.xml
   
 

svn commit: r20629 - in /release/hive/hive-2.2.0: ./ apache-hive-2.2.0-bin.tar.gz apache-hive-2.2.0-bin.tar.gz.asc apache-hive-2.2.0-bin.tar.gz.sha256 apache-hive-2.2.0-src.tar.gz apache-hive-2.2.0-sr

2017-07-25 Thread omalley
Author: omalley
Date: Tue Jul 25 17:30:33 2017
New Revision: 20629

Log:
Apache Hive 2.2.0 release

Added:
release/hive/hive-2.2.0/
release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz   (with props)
release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.asc   (with props)
release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256
release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz   (with props)
release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.asc   (with props)
release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256

Added: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz
==
Binary file - no diff available.

Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz
--
svn:mime-type = application/x-gzip

Added: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.asc
==
Binary file - no diff available.

Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.asc
--
svn:mime-type = application/pgp-signature

Added: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256
==
--- release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256 (added)
+++ release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256 Tue Jul 25 
17:30:33 2017
@@ -0,0 +1 @@
+ad75ce71191760c931c3b814120ba1c8b7fe46399bb5a173cf05560e32f7458f  
apache-hive-2.2.0-bin.tar.gz

Added: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz
==
Binary file - no diff available.

Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz
--
svn:mime-type = application/x-gzip

Added: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.asc
==
Binary file - no diff available.

Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.asc
--
svn:mime-type = application/pgp-signature

Added: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256
==
--- release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256 (added)
+++ release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256 Tue Jul 25 
17:30:33 2017
@@ -0,0 +1 @@
+8e086119a8076a60d08026ef4f0c7cb878814425417596ddd3f301a12ec8238c  
hive-2.2.0rc1.tar.gz




hive git commit: HIVE-17155: findConfFile() in HiveConf.java has some issues with the conf path (Aihua Xu, reviewed by Yongzhi Chen)

2017-07-25 Thread aihuaxu
Repository: hive
Updated Branches:
  refs/heads/master 3c7fb2a97 -> 4af249581


HIVE-17155: findConfFile() in HiveConf.java has some issues with the conf path 
(Aihua Xu, reviewed by Yongzhi Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4af24958
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4af24958
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4af24958

Branch: refs/heads/master
Commit: 4af249581a4a25bd92c9c2dd9a09e590a0cf2831
Parents: 3c7fb2a
Author: Aihua Xu 
Authored: Tue Jul 25 08:14:40 2017 -0700
Committer: Aihua Xu 
Committed: Tue Jul 25 08:14:40 2017 -0700

--
 .../src/java/org/apache/hadoop/hive/conf/HiveConf.java  | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4af24958/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 3cf76d0..f5e5974 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -161,19 +161,25 @@ public class HiveConf extends Configuration {
   result = checkConfigFile(new File(confPath, name));
   if (result == null) {
 String homePath = System.getenv("HIVE_HOME");
-String nameInConf = "conf" + File.pathSeparator + name;
+String nameInConf = "conf" + File.separator + name;
 result = checkConfigFile(new File(homePath, nameInConf));
 if (result == null) {
   URI jarUri = null;
   try {
-jarUri = 
HiveConf.class.getProtectionDomain().getCodeSource().getLocation().toURI();
+// Handle both file:// and jar:!{entry} in the case of shaded 
hive libs
+URL sourceUrl = 
HiveConf.class.getProtectionDomain().getCodeSource().getLocation();
+jarUri = sourceUrl.getProtocol().equalsIgnoreCase("jar") ? new 
URI(sourceUrl.getPath()) : sourceUrl.toURI();
   } catch (Throwable e) {
 if (l4j.isInfoEnabled()) {
   l4j.info("Cannot get jar URI", e);
 }
 System.err.println("Cannot get jar URI: " + e.getMessage());
   }
-  result = checkConfigFile(new File(new File(jarUri).getParentFile(), 
nameInConf));
+  // From the jar file, the parent is /lib folder
+  File parent = new File(jarUri).getParentFile();
+  if (parent != null) {
+result = checkConfigFile(new File(parent.getParentFile(), 
nameInConf));
+  }
 }
   }
 }



[hive] Git Push Summary

2017-07-25 Thread omalley
Repository: hive
Updated Tags:  refs/tags/rel/release-2.2.0 [created] a585c2718


[hive] Git Push Summary

2017-07-25 Thread omalley
Repository: hive
Updated Tags:  refs/tags/release-2.2.0rc1 [deleted] da840b0f8


[hive] Git Push Summary

2017-07-25 Thread omalley
Repository: hive
Updated Tags:  refs/tags/release-2.3.0-rc1 [deleted] ac7a42c13


[hive] Git Push Summary

2017-07-25 Thread omalley
Repository: hive
Updated Tags:  refs/tags/release-2.3.0-rc0 [deleted] 498721024


[hive] Git Push Summary

2017-07-25 Thread omalley
Repository: hive
Updated Tags:  refs/tags/storage-release-2.4.0rc0 [deleted] 63d7b4b5f


[hive] Git Push Summary

2017-07-25 Thread omalley
Repository: hive
Updated Tags:  refs/tags/storage-release-2.4.0rc1 [deleted] 4b354124a


[hive] Git Push Summary

2017-07-25 Thread omalley
Repository: hive
Updated Tags:  refs/tags/storage-release-2.3.0rc0 [deleted] 30e6745a6


[2/3] hive git commit: HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)

2017-07-25 Thread prasanthj
http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
index 60400de..99a4958 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
@@ -83,7 +83,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc  {
 }
   }
 
-  public static enum OperatorVariation {
+  public static enum VectorMapJoinVariation {
 NONE,
 INNER_BIG_ONLY,
 INNER,
@@ -94,7 +94,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc  {
   private HashTableImplementationType hashTableImplementationType;
   private HashTableKind hashTableKind;
   private HashTableKeyType hashTableKeyType;
-  private OperatorVariation operatorVariation;
+  private VectorMapJoinVariation vectorMapJoinVariation;
   private boolean minMaxEnabled;
 
   private VectorMapJoinInfo vectorMapJoinInfo;
@@ -103,7 +103,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc  {
 hashTableImplementationType = HashTableImplementationType.NONE;
 hashTableKind = HashTableKind.NONE;
 hashTableKeyType = HashTableKeyType.NONE;
-operatorVariation = OperatorVariation.NONE;
+vectorMapJoinVariation = VectorMapJoinVariation.NONE;
 minMaxEnabled = false;
 vectorMapJoinInfo = null;
   }
@@ -114,7 +114,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc  {
 clone.hashTableImplementationType = this.hashTableImplementationType;
 clone.hashTableKind = this.hashTableKind;
 clone.hashTableKeyType = this.hashTableKeyType;
-clone.operatorVariation = this.operatorVariation;
+clone.vectorMapJoinVariation = this.vectorMapJoinVariation;
 clone.minMaxEnabled = this.minMaxEnabled;
 if (vectorMapJoinInfo != null) {
   throw new RuntimeException("Cloning VectorMapJoinInfo not supported");
@@ -122,7 +122,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc  {
 return clone;
   }
 
-  public HashTableImplementationType hashTableImplementationType() {
+  public HashTableImplementationType getHashTableImplementationType() {
 return hashTableImplementationType;
   }
 
@@ -130,7 +130,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc  {
 this.hashTableImplementationType = hashTableImplementationType;
   }
 
-  public HashTableKind hashTableKind() {
+  public HashTableKind getHashTableKind() {
 return hashTableKind;
   }
 
@@ -138,7 +138,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc  {
 this.hashTableKind = hashTableKind;
   }
 
-  public HashTableKeyType hashTableKeyType() {
+  public HashTableKeyType getHashTableKeyType() {
 return hashTableKeyType;
   }
 
@@ -146,15 +146,15 @@ public class VectorMapJoinDesc extends AbstractVectorDesc 
 {
 this.hashTableKeyType = hashTableKeyType;
   }
 
-  public OperatorVariation operatorVariation() {
-return operatorVariation;
+  public VectorMapJoinVariation getVectorMapJoinVariation() {
+return vectorMapJoinVariation;
   }
 
-  public void setOperatorVariation(OperatorVariation operatorVariation) {
-this.operatorVariation = operatorVariation;
+  public void setVectorMapJoinVariation(VectorMapJoinVariation 
vectorMapJoinVariation) {
+this.vectorMapJoinVariation = vectorMapJoinVariation;
   }
 
-  public boolean minMaxEnabled() {
+  public boolean getMinMaxEnabled() {
 return minMaxEnabled;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java
new file mode 100644
index 000..bbdd4a3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */

[1/3] hive git commit: HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)

2017-07-25 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/master 88da23829 -> 3c7fb2a97


http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
new file mode 100644
index 000..84103ec
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
@@ -0,0 +1,549 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
+import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator;
+import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator;
+import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator;
+import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator;
+import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase;
+import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator;
+import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects;
+import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream;
+import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator;
+import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType;
+import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.ap

[3/3] hive git commit: HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)

2017-07-25 Thread prasanthj
HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation 
and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c7fb2a9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c7fb2a9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c7fb2a9

Branch: refs/heads/master
Commit: 3c7fb2a976ad363a2f96588d0c4880f682f0b855
Parents: 88da238
Author: Prasanth Jayachandran 
Authored: Tue Jul 25 02:49:01 2017 -0700
Committer: Prasanth Jayachandran 
Committed: Tue Jul 25 02:49:01 2017 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +
 itests/hive-jmh/pom.xml |   6 +
 .../vectorization/mapjoin/AbstractMapJoin.java  | 179 +
 .../mapjoin/MapJoinMultiKeyBench.java   | 313 +
 .../mapjoin/MapJoinMultiKeyBenchBase.java   |  68 ++
 .../mapjoin/MapJoinOneLongKeyBench.java | 313 +
 .../mapjoin/MapJoinOneLongKeyBenchBase.java |  66 ++
 .../mapjoin/MapJoinOneStringKeyBench.java   | 313 +
 .../mapjoin/MapJoinOneStringKeyBenchBase.java   |  66 ++
 .../hadoop/hive/ql/exec/MapJoinOperator.java|  18 +
 .../hive/ql/exec/spark/HashTableLoader.java |   2 +-
 .../hive/ql/exec/vector/VectorBatchDebug.java   | 105 +++
 .../mapjoin/VectorMapJoinCommonOperator.java|  35 +-
 .../fast/VectorMapJoinFastTableContainer.java   |   8 +-
 .../VectorMapJoinOptimizedCreateHashTable.java  |   6 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |  26 +-
 .../apache/hadoop/hive/ql/plan/MapJoinDesc.java |   6 +-
 .../hadoop/hive/ql/plan/VectorMapJoinDesc.java  |  24 +-
 .../hive/ql/exec/util/DescriptionTest.java  |  31 +
 .../CollectorTestOperator.java  |  47 ++
 .../CountCollectorTestOperator.java |  48 ++
 .../CountVectorCollectorTestOperator.java   |  47 ++
 .../RowCollectorTestOperator.java   |  52 ++
 .../RowCollectorTestOperatorBase.java   |  32 +
 .../RowVectorCollectorTestOperator.java |  63 ++
 .../ql/exec/util/rowobjects/RowTestObjects.java | 100 +++
 .../exec/util/rowobjects/RowTestObjectsMap.java |  77 +++
 .../util/rowobjects/RowTestObjectsMultiSet.java |  91 +++
 .../exec/vector/mapjoin/MapJoinTestConfig.java  | 658 +++
 .../ql/exec/vector/mapjoin/MapJoinTestData.java | 272 
 .../vector/mapjoin/MapJoinTestDescription.java  | 239 +++
 .../vector/mapjoin/TestMapJoinOperator.java | 549 
 .../util/batchgen/TestVectorBatchGenerate.java  |  57 ++
 .../batchgen/VectorBatchGenerateStream.java |  63 ++
 .../util/batchgen/VectorBatchGenerateUtil.java  |  92 +++
 .../util/batchgen/VectorBatchGenerator.java | 306 +
 .../batchgen/VectorColumnGroupGenerator.java| 494 ++
 37 files changed, 4834 insertions(+), 40 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 3ebe503..3cf76d0 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -560,6 +560,8 @@ public class HiveConf extends Configuration {
 
 HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when 
in testing tez",
 true),
+
HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD("hive.mapjoin.testing.no.hash.table.load",
 false, "internal use only, true when in testing map join",
+true),
 
 LOCALMODEAUTO("hive.exec.mode.local.auto", false,
 "Let Hive determine whether to run in local mode automatically"),

http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/pom.xml
--
diff --git a/itests/hive-jmh/pom.xml b/itests/hive-jmh/pom.xml
index af8eb19..0ff584c 100644
--- a/itests/hive-jmh/pom.xml
+++ b/itests/hive-jmh/pom.xml
@@ -56,7 +56,13 @@
 
   org.apache.hive
   hive-exec
+   ${project.version}
+
+
+  org.apache.hive
+  hive-exec
   ${project.version}
+  tests
 
 
   org.apache.hadoop

http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java
--
diff --git 
a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java
 
b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/Abstra