hive git commit: HIVE-17131: Add InterfaceAudience and InterfaceStability annotations for SerDe APIs (Sahil Takiar, reviewed by Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/branch-2 726f270a6 -> a9e5427e5 HIVE-17131: Add InterfaceAudience and InterfaceStability annotations for SerDe APIs (Sahil Takiar, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a9e5427e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a9e5427e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a9e5427e Branch: refs/heads/branch-2 Commit: a9e5427e5185e47ce2dc28130e475a7037406463 Parents: 726f270 Author: Sahil Takiar Authored: Tue Jul 25 21:19:24 2017 -0700 Committer: Sahil Takiar Committed: Tue Jul 25 21:21:06 2017 -0700 -- .../org/apache/hadoop/hive/serde2/AbstractDeserializer.java| 4 .../src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java | 4 .../java/org/apache/hadoop/hive/serde2/AbstractSerializer.java | 4 serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java | 4 .../src/java/org/apache/hadoop/hive/serde2/SerDeException.java | 6 +- serde/src/java/org/apache/hadoop/hive/serde2/SerDeStats.java | 5 + serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java | 4 .../hadoop/hive/serde2/objectinspector/ObjectInspector.java| 5 + 8 files changed, 35 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a9e5427e/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java -- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java index 869b86b..e850406 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractDeserializer.java @@ -21,6 +21,8 @@ package org.apache.hadoop.hive.serde2; import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -29,6 +31,8 @@ import org.apache.hadoop.io.Writable; * new methods can be added in the underlying interface, Deserializer, and only implementations * that need those methods overwrite it. */ +@InterfaceAudience.Public +@InterfaceStability.Stable public abstract class AbstractDeserializer implements Deserializer { /** http://git-wip-us.apache.org/repos/asf/hive/blob/a9e5427e/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java -- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java index 049b35d..939d90b 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java @@ -22,6 +22,8 @@ import java.util.Map; import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -32,6 +34,8 @@ import javax.annotation.Nullable; * new methods can be added in the underlying interface, SerDe, and only implementations * that need those methods overwrite it. */ +@InterfaceAudience.Public +@InterfaceStability.Stable public abstract class AbstractSerDe implements Deserializer, Serializer { protected String configErrors; http://git-wip-us.apache.org/repos/asf/hive/blob/a9e5427e/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java -- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java index 570b4bb..8b30056 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerializer.java @@ -21,6 +21,8 @@ package org.apache.hadoop.hive.serde2; import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -29,6 +31,8 @@ import org.apache.hadoop.io.Writable; * new methods can be added in
[04/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index 5593e42..559f05e 100644 --- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -14,10 +14,31 @@ PREHOOK: Input: default@src POSTHOOK: query: describe formatted src key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -keystring 0 309 2.812 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +keystring 0 309 2.812 3 SExM4LUCtQLBoliC580Fv5Nq/8NRwJtoxdPYB/yjmAO/kokBgcjcBMHblgH/lecCgMn9AcC78gm/ +wzeA/BHB6MgH/5KjAsKL1QqAge0D/obCAoCvMMGIDoDPDr/ovgSAn6ALgL7TEMGr8wX/6xnAuqgF +wcgagNoPwY+GAf7ouATEjJwBvIfKAoGetgS/ysoIwN4dwMHtA8DAnQmB7TWBspEFv+7uBMGB4wL+ +mmvAqtEBgrrQAYGlkQb9j1CBuYcE/5VDgNciweGkA4Hf9wS+ws4BgPdxgJr/BcCctQOB2rQD/+LE +CMDf/QHAklbAxaoBgP7gA4Gu1weBx4YB/remBMHjToDh2wH/+sEHwMqxA8DvyATBrUj/7ecCgP0H +ifmTBIC8FriHfv/5pArAlSzAnIoBwJ/bAsXdU7v+0g7ArvQBgOX+AoD5+hPA/4oHgK3rAcCWB4D+ +iwSA/soDgu71Ab68MYD/gQKAh+ECwN6xBMCTYoSh4AO9nESArIcGwYmeA/7TpAWBncQDwthzvrb9 +BIG2hQG+lbgIgan2DP/2R4Cu1giAlJcDg7UU/ZBfgchL/4O8AsDO8gKBodUGwMTcA8OKggP88sYD +w9tCvr+CA7/W3QfE05wB/dWUCcCa2QmBsY4HgrD3BbzCwguAih3B67gD/9zeAYX0twf7048HwLKS +CICJC8GyrgGB0akBvr2mAcDP8QGA0/ECwMScA4GXrAGBpokFgKgKwNSHBP7sMIDfkAbBpzy/pdcK +wLnfAYLeKP6R5wLCssoBv8JY/7IvwNEMgq+7AsDQH/6VwAGA8nXCisoBvvevAoH4PMKOqAO+lnj/ +yjnCwzL/7IADv/8jhK28Brzx1wGCmk6/0t0CgIyaAv/CnwXAzZ0BwJadA4GCKcC53AG/sUCBhs8J +gZmRAb6zwwSBgLYGweuIAb+tbsH6gwPAngH+ysQBhLxh/NihAsGYswS/l8MKgtFX/u6jA8XB6AL8 +3tAC/5TDBMHMvgG/0NICwO79BYS82Qa8oMQBwPqBBcHhI//N2RLAitsEgJnuBcD+qAXA/t0FwJ32 +CoDBrAHDlSD9ltIDwMieBIC26QLBzZkGgbecAYCvCf+Xgwn/40+JgRX3xNcBwIkLweaQAYDuggKB +2okBwPESvtvPBsHxpQL/ucsCwIeJA8CjBoH6SP+UnwKCw8IGv7mcBYHtywKChYICvKDpAsDx5gHC ++MIF/sDeBYCh1ALA+poCwYygCL+TTITO3AK9weMEwf+fAb/V5AKApKMGg8Fg/J9OwfnNCv/pkgLC +z+gEvt+XA4CqlgGAlOQDgIaICYGmzAGCiYUHv96HBv/njQHEzfsEv9LuB7zk1gPD4RiDo/UDu5qr +A//IIcDDHsCwDYDozwLEq9EBvZp1gLwawJaGAv/JwA
[11/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b79fe6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b79fe6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b79fe6 Branch: refs/heads/master Commit: f8b79fe6d136f348820ce81dc7a6883f1e70dcfc Parents: 892841a Author: Pengcheng Xiong Authored: Tue Jul 25 15:41:14 2017 -0700 Committer: Pengcheng Xiong Committed: Tue Jul 25 15:41:14 2017 -0700 -- .../apache/hadoop/hive/common/ndv/FMSketch.java | 117 +-- .../ndv/NumDistinctValueEstimatorFactory.java | 30 +- .../hive/common/ndv/fm/FMSketchUtils.java | 133 .../hadoop/hive/common/ndv/hll/HyperLogLog.java | 4 +- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +- .../ndv/fm/TestFMSketchSerialization.java | 97 +++ data/conf/hive-site.xml | 4 + data/conf/llap/hive-site.xml| 4 + data/conf/perf-reg/hive-site.xml| 5 + data/conf/tez/hive-site.xml | 5 + .../upgrade/derby/044-HIVE-16997.derby.sql | 1 + .../upgrade/derby/hive-schema-3.0.0.derby.sql | 2 +- .../derby/upgrade-2.3.0-to-3.0.0.derby.sql | 1 + .../upgrade/mssql/029-HIVE-16997.mssql.sql | 1 + .../upgrade/mssql/hive-schema-3.0.0.mssql.sql | 1 + .../mssql/upgrade-2.3.0-to-3.0.0.mssql.sql | 1 + .../upgrade/mysql/044-HIVE-16997.mysql.sql | 1 + .../upgrade/mysql/hive-schema-3.0.0.mysql.sql | 1 + .../mysql/upgrade-2.3.0-to-3.0.0.mysql.sql | 1 + .../upgrade/oracle/044-HIVE-16997.oracle.sql| 1 + .../upgrade/oracle/hive-schema-3.0.0.oracle.sql | 1 + .../oracle/upgrade-2.3.0-to-3.0.0.oracle.sql| 1 + .../postgres/043-HIVE-16997.postgres.sql| 1 + .../postgres/hive-schema-3.0.0.postgres.sql | 1 + .../upgrade-2.3.0-to-3.0.0.postgres.sql | 1 + .../hive/metastore/MetaStoreDirectSql.java | 98 ++- .../hadoop/hive/metastore/MetaStoreUtils.java | 49 +- .../hadoop/hive/metastore/ObjectStore.java | 20 +- .../hive/metastore/StatObjectConverter.java | 40 +- .../hive/metastore/cache/CachedStore.java | 50 +- .../aggr/DateColumnStatsAggregator.java | 358 + .../aggr/StringColumnStatsAggregator.java | 303 .../hadoop/hive/metastore/hbase/StatsCache.java | 11 +- .../stats/BinaryColumnStatsAggregator.java | 2 +- .../stats/BooleanColumnStatsAggregator.java | 2 +- .../hbase/stats/ColumnStatsAggregator.java | 4 +- .../stats/ColumnStatsAggregatorFactory.java | 14 +- .../stats/DecimalColumnStatsAggregator.java | 35 +- .../stats/DoubleColumnStatsAggregator.java | 33 +- .../hbase/stats/IExtrapolatePartStatus.java | 2 +- .../hbase/stats/LongColumnStatsAggregator.java | 34 +- .../stats/StringColumnStatsAggregator.java | 122 --- .../stats/merge/BinaryColumnStatsMerger.java| 2 +- .../stats/merge/BooleanColumnStatsMerger.java | 2 +- .../hbase/stats/merge/ColumnStatsMerger.java| 2 +- .../stats/merge/ColumnStatsMergerFactory.java | 2 +- .../stats/merge/DateColumnStatsMerger.java | 2 +- .../stats/merge/DecimalColumnStatsMerger.java | 2 +- .../stats/merge/DoubleColumnStatsMerger.java| 2 +- .../stats/merge/LongColumnStatsMerger.java | 2 +- .../stats/merge/StringColumnStatsMerger.java| 2 +- .../model/MPartitionColumnStatistics.java | 24 +- .../metastore/model/MTableColumnStatistics.java | 24 +- metastore/src/model/package.jdo | 6 + .../hadoop/hive/metastore/TestOldSchema.java| 229 ++ .../hive/metastore/cache/TestCachedStore.java | 156 ...stHBaseAggregateStatsCacheWithBitVector.java | 13 +- .../TestHBaseAggregateStatsExtrapolation.java | 11 +- .../TestHBaseAggregateStatsNDVUniformDist.java | 15 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 2 +- .../formatting/MetaDataFormatUtils.java | 19 +- .../hadoop/hive/ql/plan/ColStatistics.java | 3 - .../hadoop/hive/ql/plan/DescTableDesc.java | 4 +- .../ql/udf/generic/GenericUDAFComputeStats.java | 2 +- ...lter_table_update_status_disable_bitvector.q | 139 ql/src/test/queries/clientpositive/bitvector.q | 3 + ql/src/test/queries/clientpositive/fm-sketch.q | 58 ++ ql/src/test/queries/clientpositive/hll.q| 11 + .../clientpositive/alterColumnStats.q.out | 16 +- .../clientpositive/alterColumnStatsPart.q.out | 6 +- .../alter_partition_update_status.q.out | 40 +- .../alter_table_column_stats.q.out | 764 +++ .../alter_table_update_status.q.out | 394 +- ..._table_update_status_disable_bitvector.q.out | 708 +
[09/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java -- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java new file mode 100644 index 000..54828f2 --- /dev/null +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestOldSchema.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestOldSchema { + private ObjectStore store = null; + + private static final Logger LOG = LoggerFactory.getLogger(TestOldSchema.class.getName()); + + public static class MockPartitionExpressionProxy implements PartitionExpressionProxy { +@Override +public String convertExprToFilter(byte[] expr) throws MetaException { + return null; +} + +@Override +public boolean filterPartitionsByExpr(List partColumnNames, +List partColumnTypeInfos, byte[] expr, String defaultPartitionName, +List partitionNames) throws MetaException { + return false; +} + +@Override +public FileMetadataExprType getMetadataType(String inputFormat) { + return null; +} + +@Override +public SearchArgument createSarg(byte[] expr) { + return null; +} + +@Override +public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { + return null; +} + } + + String bitVectors[] = new String[2]; + + @Before + public void setUp() throws Exception { +HiveConf conf = new HiveConf(); +conf.setVar(HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS, +MockPartitionExpressionProxy.class.getName()); +conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR, false); + +store = new ObjectStore(); +store.setConf(conf); +dropAllStoreObjects(store); + +HyperLogLog hll = HyperLogLog.builder().build(); +hll.addLong(1); +bitVectors[1] = hll.serialize(); +hll = HyperLogLog.builder().build(); +hll.addLong(2); +hll.addLong(3); +hll.addLong(3); +hll.addLong(4); +bitVectors[0] = hll.serialize(); + } + + @Aft
[03/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out -- diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index b5f4fee..b6aedc4 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -89,18 +89,20 @@ PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 3 0.75 2 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +state string 0 3 0.75 2 SExM4AMDgaTbFcD8mOYCwMOJoQQ= + from deserializer PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state PREHOOK: type: DESCTABLE PREHOOK: Input: default@loc_orc_1d POSTHOOK: query: describe formatted loc_orc_1d PARTITION(year='2002') state POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@loc_orc_1d -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -state string 0 6 3.0 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +state string 0 6 3.0 3 SExM4AYGhJ2RPL68foHA90C/kJJjgJX39QKAwfg7 + from deserializer PREHOOK: query: explain extended select state from loc_orc_1d PREHOOK: type: QUERY POSTHOOK: query: explain extended select state from loc_orc_1d @@ -296,12 +298,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 -Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL +Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -499,12 +501,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false
[07/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_update_status.q.out -- diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out index 9cd9a8d..f23ba57 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -46,10 +46,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -keystring 0 16 1.72 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +keystring 0 16 1.72 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='','avgColLen'='1.111') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='','avgColLen'='1.111') @@ -60,10 +62,12 @@ PREHOOK: Input: default@src_stat POSTHOOK: query: describe formatted src_stat key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_stat -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -keystring 0 1.111 3 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +keystring 0 1.111 3 SExM4BAQgaTbFYDg/BOBktRrv6WcHMDErSCAotAegLu2HIHRunr/p8hhwLbpSsD22ZgBgdS9KMHV +myn/wfcugOGjfsCYzA0= + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') PREHOOK: type: ALTERTABL
[01/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/master 892841a46 -> f8b79fe6d http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/rename_external_partition_location.q.out -- diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out index 19546c3..893aea3 100644 --- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out +++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out @@ -162,18 +162,20 @@ PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -keyint 0 9 0 6 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +keyint 0 9 0 6 SExM4AYGxdOOGLy91N8BwJKLAcGuwk7AqvwN/4Sz5AE= + from deserializer PREHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part1') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 6 5.0 5 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 6 5.0 5 SExM4AYGwZXdyQGC2MSsAcCIiJQBvtSupwHDnsmSAr36nzs= + from deserializer PREHOOK: query: ALTER TABLE ex_table PARTITION (part='part1') RENAME TO PARTITION (part='part2') PREHOOK: type: ALTERTABLE_RENAMEPART PREHOOK: Input: default@ex_table @@ -310,15 +312,17 @@ PREHOOK: Input: default@ex_table POSTHOOK: query: DESCRIBE FORMATTED ex_table PARTITION (part='part2') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ex_table -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -keyint 0 9 0 6 from deserializer +# col_name data_type min max num_nulls
[05/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/colstats_all_nulls.q.out -- diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out index 0f28225..ca1ec00 100644 --- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out +++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out @@ -41,20 +41,22 @@ PREHOOK: Input: default@all_nulls POSTHOOK: query: describe formatted all_nulls a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -a bigint 0 0 5 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +a bigint 0 0 5 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: describe formatted all_nulls b PREHOOK: type: DESCTABLE PREHOOK: Input: default@all_nulls POSTHOOK: query: describe formatted all_nulls b POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -b double 0.0 0.0 5 0 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +b double 0.0 0.0 5 1 SExM4AEA + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: drop table all_nulls PREHOOK: type: DROPTABLE PREHOOK: Input: default@all_nulls http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out -- diff --git a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out index fb833bc..74085bf 100644 --- a/ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces
[08/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_column_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out index 96dce1e..3676204 100644 --- a/ql/src/test/results/clientpositive/alter_table_column_stats.q.out +++ b/ql/src/test/results/clientpositive/alter_table_column_stats.q.out @@ -123,30 +123,33 @@ PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col1 int 27 484 0 10 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col1 int 27 484 0 10 SExM4AoKwtK/OYCi1Bu/sMcRx9SoWfnl+S+BsIeDAb/T4wfChbuXAf+34EDBsoy/AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -col2 string 0 10 6.7 7 from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +col2 string 0 10 6.7 7 SExM4AoKge76QcHLqCHAtN4dv5LMNcG35k7A+Oe9Af+Nq6wCgoTFpgG+39Yigri1AQ== + from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} PREHOOK: query: describe formatted statsdb1.testtable0 col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 POSTHOOK: query: describe formatted statsdb1.testtable0 col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testtable0 -# col_name data_type min max num_nulls distinct_count avg_col_len
[10/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java -- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java new file mode 100644 index 000..6fae3e5 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -0,0 +1,358 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.aggr; + +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Date; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DateColumnStatsAggregator extends ColumnStatsAggregator implements +IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(DateColumnStatsAggregator.class); + + @Override + public ColumnStatisticsObj aggregate(String colName, List partNames, + List css) throws MetaException { +ColumnStatisticsObj statsObj = null; + +// check if all the ColumnStatisticsObjs contain stats and all the ndv are +// bitvectors +boolean doAllPartitionContainStats = partNames.size() == css.size(); +LOG.debug("doAllPartitionContainStats for " + colName + " is " + doAllPartitionContainStats); +NumDistinctValueEstimator ndvEstimator = null; +String colType = null; +for (ColumnStatistics cs : css) { + if (cs.getStatsObjSize() != 1) { +throw new MetaException( +"The number of columns should be exactly one in aggrStats, but found " ++ cs.getStatsObjSize()); + } + ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); + if (statsObj == null) { +colType = cso.getColType(); +statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso +.getStatsData().getSetField()); + } + if (!cso.getStatsData().getDateStats().isSetBitVectors() + || cso.getStatsData().getDateStats().getBitVectors().length() == 0) { +ndvEstimator = null; +break; + } else { +// check if all of the bit vectors can merge +NumDistinctValueEstimator estimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(cso.getStatsData().getDateStats().getBitVectors()); +if (ndvEstimator == null) { + ndvEstimator = estimator; +} else { + if (ndvEstimator.canMerge(estimator)) { +continue; + } else { +ndvEstimator = null; +break; + } +} + } +} +if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); +} +LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); +ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); +if (doAllPartitionContainStats || css.size() < 2) { + DateColumnStatsData aggregateData = null; + long lowerBound = 0; + long higherBound = 0; + double densityAvgSum = 0.0; + for (ColumnStatistics cs : css) { +ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); +DateColumnStatsData newData = cso.getStatsData().getDateStats(); +lowerBound = Math.max(lowe
[02/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index 5e64743..20e59a3 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -80,36 +80,40 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -keyint 27 484 0 20 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +keyint 27 484 0 20 SExM4BQUwv+PD4DTryqAhvQHgJzgE8DtpQL/wqEPx9SoWfm94QmAqJgmgbCHgwG/0+MHwJKHG4De +jWeClaYVvr3WP8H6iQHDxpxl/uvvWb7ssz2AsKk6 + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 20 6.8 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 20 6.8 7 SExM4BQUwcbsKcCnjhjArvYEgZ2yHMC03h2A4LIS/4KtDMCv7BbAz6JGgejDCP+AlzSA84UvwYTL +WsDygtQBv5uoWIKExaYBvt/WIoK4tQG/vpwPv72sGg== + from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment - -keyint from deserializer +# col_name data_type comment
[06/11] hive git commit: HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out -- diff --git a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out new file mode 100644 index 000..1dcc1fc --- /dev/null +++ b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out @@ -0,0 +1,708 @@ +PREHOOK: query: create table src_stat as select * from src1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat +POSTHOOK: query: create table src_stat as select * from src1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat +POSTHOOK: Lineage: src_stat.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table src_stat_int ( + key double, + value string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_int +POSTHOOK: query: create table src_stat_int ( + key double, + value string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_int +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@src_stat_int +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@src_stat_int +PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat + A masked pattern was here +POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat + A masked pattern was here +PREHOOK: query: describe formatted src_stat key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +keystring 0 16 1.72 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='','avgColLen'='1.111') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='','avgColLen'='1.111') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted src_stat key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +keystring 0 1.111 3 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET
[2/2] hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) ADDENDUM)
HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) ADDENDUM) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/892841a4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/892841a4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/892841a4 Branch: refs/heads/master Commit: 892841a46e3d3026d075b0af85c82196d7bf7f40 Parents: 20276d2 Author: sergey Authored: Tue Jul 25 13:56:27 2017 -0700 Committer: sergey Committed: Tue Jul 25 13:56:49 2017 -0700 -- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/892841a4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cf80a6c..dd9ad71 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3003,7 +3003,7 @@ public class HiveConf extends Configuration { LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false), "The buffer size for a per-fragment LLAP debug trace. 0 to disable."), -LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO# +LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", false, "Whether to always dump the LLAP IO trace (if enabled); the default is on error."), LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", true, "Whether the LLAP IO layer is enabled for non-vectorized queries that read inputs\n" +
[1/2] hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) ADDENDUM)
Repository: hive Updated Branches: refs/heads/branch-2 288b5bfe6 -> 726f270a6 refs/heads/master 20276d211 -> 892841a46 HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) ADDENDUM) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/726f270a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/726f270a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/726f270a Branch: refs/heads/branch-2 Commit: 726f270a6e5c720a98ac58f2c4a549e70b45fbad Parents: 288b5bf Author: sergey Authored: Tue Jul 25 13:56:27 2017 -0700 Committer: sergey Committed: Tue Jul 25 13:56:27 2017 -0700 -- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/726f270a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 91c2552..30506b0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2900,7 +2900,7 @@ public class HiveConf extends Configuration { LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false), "The buffer size for a per-fragment LLAP debug trace. 0 to disable."), -LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO# +LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", false, "Whether to always dump the LLAP IO trace (if enabled); the default is on error."), LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", true, "Whether the LLAP IO layer is enabled for non-vectorized queries that read inputs\n" +
hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)
Repository: hive Updated Branches: refs/heads/branch-2 c37fdf96e -> 288b5bfe6 HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/288b5bfe Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/288b5bfe Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/288b5bfe Branch: refs/heads/branch-2 Commit: 288b5bfe62405d93513625c2732c7ef5f80c9d43 Parents: c37fdf9 Author: sergey Authored: Tue Jul 25 12:50:25 2017 -0700 Committer: sergey Committed: Tue Jul 25 12:50:25 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 5 + .../hive/llap/io/api/impl/LlapIoImpl.java | 7 +- .../llap/io/decode/EncodedDataConsumer.java | 1 + .../io/decode/GenericColumnVectorProducer.java | 9 +- .../llap/io/decode/OrcColumnVectorProducer.java | 15 +- .../llap/io/decode/OrcEncodedDataConsumer.java | 12 + .../llap/io/encoded/OrcEncodedDataReader.java | 57 ++- .../ql/io/orc/encoded/EncodedReaderImpl.java| 102 ++-- .../hadoop/hive/ql/io/orc/encoded/IoTrace.java | 492 +++ .../hadoop/hive/ql/io/orc/encoded/Reader.java | 2 +- .../hive/ql/io/orc/encoded/ReaderImpl.java | 6 +- .../io/orc/encoded/TestEncodedReaderImpl.java | 14 +- 12 files changed, 655 insertions(+), 67 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/288b5bfe/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 365e3e7..91c2552 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2897,6 +2897,11 @@ public class HiveConf extends Configuration { -1f, "The customized fraction of JVM memory which Tez will reserve for the processor"), // The default is different on the client and server, so it's null here. LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), +LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", +new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false), +"The buffer size for a per-fragment LLAP debug trace. 0 to disable."), +LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO# +"Whether to always dump the LLAP IO trace (if enabled); the default is on error."), LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", true, "Whether the LLAP IO layer is enabled for non-vectorized queries that read inputs\n" + "that can be vectorized"), http://git-wip-us.apache.org/repos/asf/hive/blob/288b5bfe/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index 5a86114..daf7101 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -61,10 +61,12 @@ import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; import org.apache.hadoop.hive.llap.metrics.MetricsUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.metrics2.util.MBeans; +import org.apache.hive.common.util.FixedSizedObjectPool; import com.google.common.primitives.Ints; import com.google.common.util.concurrent.ListeningExecutorService; @@ -183,10 +185,11 @@ public class LlapIoImpl implements LlapIo { new LinkedBlockingQueue(), new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); // TODO: this should depends on input format and be in a map, or something. +FixedSizedObjectPool tracePool = IoTrace.createTracePool(conf); this.orcCvp = new OrcColumnVectorProducer( -metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics); +metadataCache, cache, bufferManager, conf, cacheMetrics, ioMetrics, tracePool); this.genericCvp = isEncodeEnabled ? new GenericColumnVectorProducer( -serdeCache, bufferManager, conf, cacheMetrics, ioMetrics) : null; +serdeCache, bufferManager, conf, c
hive git commit: HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)
Repository: hive Updated Branches: refs/heads/master 4af249581 -> 20276d211 HIVE-16954 : LLAP IO: better debugging (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/20276d21 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/20276d21 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/20276d21 Branch: refs/heads/master Commit: 20276d2113f669a2ea08480ce76df9bd6b913d09 Parents: 4af2495 Author: sergey Authored: Tue Jul 25 12:24:54 2017 -0700 Committer: sergey Committed: Tue Jul 25 12:24:54 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 5 + .../hive/llap/io/api/impl/LlapIoImpl.java | 8 +- .../llap/io/decode/EncodedDataConsumer.java | 1 + .../io/decode/GenericColumnVectorProducer.java | 9 +- .../llap/io/decode/OrcColumnVectorProducer.java | 15 +- .../llap/io/decode/OrcEncodedDataConsumer.java | 12 + .../llap/io/encoded/OrcEncodedDataReader.java | 56 ++- .../ql/io/orc/encoded/EncodedReaderImpl.java| 102 ++-- .../hadoop/hive/ql/io/orc/encoded/IoTrace.java | 478 +++ .../hadoop/hive/ql/io/orc/encoded/Reader.java | 2 +- .../hive/ql/io/orc/encoded/ReaderImpl.java | 6 +- .../io/orc/encoded/TestEncodedReaderImpl.java | 14 +- 12 files changed, 641 insertions(+), 67 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/20276d21/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f5e5974..cf80a6c 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3000,6 +3000,11 @@ public class HiveConf extends Configuration { -1f, "The customized fraction of JVM memory which Tez will reserve for the processor"), // The default is different on the client and server, so it's null here. LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), +LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", +new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false), +"The buffer size for a per-fragment LLAP debug trace. 0 to disable."), +LLAP_IO_TRACE_ALWAYS_DUMP("hive.llap.io.trace.always.dump", true, // TODO# +"Whether to always dump the LLAP IO trace (if enabled); the default is on error."), LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", true, "Whether the LLAP IO layer is enabled for non-vectorized queries that read inputs\n" + "that can be vectorized"), http://git-wip-us.apache.org/repos/asf/hive/blob/20276d21/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index 53c9bae..35b9d1f 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -58,11 +58,14 @@ import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics; import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; import org.apache.hadoop.hive.llap.metrics.MetricsUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.metrics2.util.MBeans; +import org.apache.hive.common.util.FixedSizedObjectPool; + import com.google.common.primitives.Ints; import com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -184,11 +187,12 @@ public class LlapIoImpl implements LlapIo { 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue(), new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); +FixedSizedObjectPool tracePool = IoTrace.createTracePool(conf); // TODO: this should depends on input format and be in a map, or something. this.orcCvp = new OrcColumnVectorProducer( -metadataCache, cache, bufferManagerOrc, conf, cacheMetrics, ioMetrics); +metadataCache, cache, bufferManagerOrc, conf, cacheMetrics, ioMetrics, tracePool); this.genericCvp = isEncodeEnabled ? new GenericColumnVectorProducer( -serde
svn commit: r1015963 - in /websites/production/hive/content/javadocs/r2.2.0: ./ api/ api/org/ api/org/apache/ api/org/apache/hadoop/ api/org/apache/hadoop/fs/ api/org/apache/hadoop/fs/class-use/ api/o
Author: omalley Date: Tue Jul 25 19:20:24 2017 New Revision: 1015963 Log: Adding javadoc for 2.2.0 [This commit notification would consist of 4097 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.]
hive git commit: Preparing for 2.2.1 development.
Repository: hive Updated Branches: refs/heads/branch-2.2 da840b0f8 -> 1ed1f2807 Preparing for 2.2.1 development. Signed-off-by: Owen O'Malley Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ed1f280 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ed1f280 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ed1f280 Branch: refs/heads/branch-2.2 Commit: 1ed1f2807d411bf14357750adf4a57d6d7bfb62a Parents: da840b0 Author: Owen O'Malley Authored: Tue Jul 25 10:46:35 2017 -0700 Committer: Owen O'Malley Committed: Tue Jul 25 10:46:35 2017 -0700 -- accumulo-handler/pom.xml| 2 +- ant/pom.xml | 2 +- beeline/pom.xml | 2 +- cli/pom.xml | 2 +- common/pom.xml | 2 +- contrib/pom.xml | 2 +- druid-handler/pom.xml | 2 +- hbase-handler/pom.xml | 2 +- hcatalog/core/pom.xml | 2 +- hcatalog/hcatalog-pig-adapter/pom.xml | 2 +- hcatalog/pom.xml| 2 +- hcatalog/server-extensions/pom.xml | 2 +- hcatalog/streaming/pom.xml | 2 +- hcatalog/webhcat/java-client/pom.xml| 2 +- hcatalog/webhcat/svr/pom.xml| 2 +- hplsql/pom.xml | 2 +- hwi/pom.xml | 2 +- itests/custom-serde/pom.xml | 2 +- itests/custom-udfs/pom.xml | 2 +- itests/custom-udfs/udf-classloader-udf1/pom.xml | 2 +- itests/custom-udfs/udf-classloader-udf2/pom.xml | 2 +- itests/custom-udfs/udf-classloader-util/pom.xml | 2 +- itests/hcatalog-unit/pom.xml| 2 +- itests/hive-jmh/pom.xml | 2 +- itests/hive-minikdc/pom.xml | 2 +- itests/hive-unit-hadoop2/pom.xml| 2 +- itests/hive-unit/pom.xml| 2 +- itests/pom.xml | 2 +- itests/qtest-accumulo/pom.xml | 2 +- itests/qtest-spark/pom.xml | 2 +- itests/qtest/pom.xml| 2 +- itests/test-serde/pom.xml | 2 +- itests/util/pom.xml | 2 +- jdbc/pom.xml| 2 +- llap-client/pom.xml | 2 +- llap-common/pom.xml | 2 +- llap-ext-client/pom.xml | 2 +- llap-server/pom.xml | 2 +- llap-tez/pom.xml| 2 +- metastore/pom.xml | 2 +- orc/pom.xml | 2 +- packaging/pom.xml | 2 +- pom.xml | 2 +- ql/pom.xml | 2 +- serde/pom.xml | 2 +- service-rpc/pom.xml | 2 +- service/pom.xml | 2 +- shims/0.23/pom.xml | 2 +- shims/aggregator/pom.xml| 2 +- shims/common/pom.xml| 2 +- shims/pom.xml | 2 +- shims/scheduler/pom.xml | 2 +- spark-client/pom.xml| 4 ++-- testutils/pom.xml | 2 +- 54 files changed, 55 insertions(+), 55 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1ed1f280/accumulo-handler/pom.xml -- diff --git a/accumulo-handler/pom.xml b/accumulo-handler/pom.xml index 210f441..6105ad8 100644 --- a/accumulo-handler/pom.xml +++ b/accumulo-handler/pom.xml @@ -19,7 +19,7 @@ org.apache.hive hive -2.2.0 +2.2.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/hive/blob/1ed1f280/ant/pom.xml -- diff --git a/ant/pom.xml b/ant/pom.xml index 3236797..3140b82 100644 --- a/ant/pom.xml +++ b/ant/pom.xml @@ -19,7 +19,7 @@ org.apache.hive hive -2.2.0 +2.2.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/hive/blob/1ed1f280/beeline/pom.xml -- diff --git a/beeline/pom.xml b/beeline/pom.xml index 20483f7..667a152 100644 --- a/beeline/pom.xml +++ b/beeline/pom.xml @@ -19,7 +19,7 @@ org.apache.hive hive -2.2.0 +2.2.1-SNAPSHOT ../pom.xml
svn commit: r20629 - in /release/hive/hive-2.2.0: ./ apache-hive-2.2.0-bin.tar.gz apache-hive-2.2.0-bin.tar.gz.asc apache-hive-2.2.0-bin.tar.gz.sha256 apache-hive-2.2.0-src.tar.gz apache-hive-2.2.0-sr
Author: omalley Date: Tue Jul 25 17:30:33 2017 New Revision: 20629 Log: Apache Hive 2.2.0 release Added: release/hive/hive-2.2.0/ release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz (with props) release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.asc (with props) release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256 release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz (with props) release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.asc (with props) release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256 Added: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz == Binary file - no diff available. Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz -- svn:mime-type = application/x-gzip Added: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.asc == Binary file - no diff available. Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.asc -- svn:mime-type = application/pgp-signature Added: release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256 == --- release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256 (added) +++ release/hive/hive-2.2.0/apache-hive-2.2.0-bin.tar.gz.sha256 Tue Jul 25 17:30:33 2017 @@ -0,0 +1 @@ +ad75ce71191760c931c3b814120ba1c8b7fe46399bb5a173cf05560e32f7458f apache-hive-2.2.0-bin.tar.gz Added: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz == Binary file - no diff available. Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz -- svn:mime-type = application/x-gzip Added: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.asc == Binary file - no diff available. Propchange: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.asc -- svn:mime-type = application/pgp-signature Added: release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256 == --- release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256 (added) +++ release/hive/hive-2.2.0/apache-hive-2.2.0-src.tar.gz.sha256 Tue Jul 25 17:30:33 2017 @@ -0,0 +1 @@ +8e086119a8076a60d08026ef4f0c7cb878814425417596ddd3f301a12ec8238c hive-2.2.0rc1.tar.gz
hive git commit: HIVE-17155: findConfFile() in HiveConf.java has some issues with the conf path (Aihua Xu, reviewed by Yongzhi Chen)
Repository: hive Updated Branches: refs/heads/master 3c7fb2a97 -> 4af249581 HIVE-17155: findConfFile() in HiveConf.java has some issues with the conf path (Aihua Xu, reviewed by Yongzhi Chen) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4af24958 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4af24958 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4af24958 Branch: refs/heads/master Commit: 4af249581a4a25bd92c9c2dd9a09e590a0cf2831 Parents: 3c7fb2a Author: Aihua Xu Authored: Tue Jul 25 08:14:40 2017 -0700 Committer: Aihua Xu Committed: Tue Jul 25 08:14:40 2017 -0700 -- .../src/java/org/apache/hadoop/hive/conf/HiveConf.java | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4af24958/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3cf76d0..f5e5974 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -161,19 +161,25 @@ public class HiveConf extends Configuration { result = checkConfigFile(new File(confPath, name)); if (result == null) { String homePath = System.getenv("HIVE_HOME"); -String nameInConf = "conf" + File.pathSeparator + name; +String nameInConf = "conf" + File.separator + name; result = checkConfigFile(new File(homePath, nameInConf)); if (result == null) { URI jarUri = null; try { -jarUri = HiveConf.class.getProtectionDomain().getCodeSource().getLocation().toURI(); +// Handle both file:// and jar:!{entry} in the case of shaded hive libs +URL sourceUrl = HiveConf.class.getProtectionDomain().getCodeSource().getLocation(); +jarUri = sourceUrl.getProtocol().equalsIgnoreCase("jar") ? new URI(sourceUrl.getPath()) : sourceUrl.toURI(); } catch (Throwable e) { if (l4j.isInfoEnabled()) { l4j.info("Cannot get jar URI", e); } System.err.println("Cannot get jar URI: " + e.getMessage()); } - result = checkConfigFile(new File(new File(jarUri).getParentFile(), nameInConf)); + // From the jar file, the parent is /lib folder + File parent = new File(jarUri).getParentFile(); + if (parent != null) { +result = checkConfigFile(new File(parent.getParentFile(), nameInConf)); + } } } }
[hive] Git Push Summary
Repository: hive Updated Tags: refs/tags/rel/release-2.2.0 [created] a585c2718
[hive] Git Push Summary
Repository: hive Updated Tags: refs/tags/release-2.2.0rc1 [deleted] da840b0f8
[hive] Git Push Summary
Repository: hive Updated Tags: refs/tags/release-2.3.0-rc1 [deleted] ac7a42c13
[hive] Git Push Summary
Repository: hive Updated Tags: refs/tags/release-2.3.0-rc0 [deleted] 498721024
[hive] Git Push Summary
Repository: hive Updated Tags: refs/tags/storage-release-2.4.0rc0 [deleted] 63d7b4b5f
[hive] Git Push Summary
Repository: hive Updated Tags: refs/tags/storage-release-2.4.0rc1 [deleted] 4b354124a
[hive] Git Push Summary
Repository: hive Updated Tags: refs/tags/storage-release-2.3.0rc0 [deleted] 30e6745a6
[2/3] hive git commit: HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)
http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 60400de..99a4958 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -83,7 +83,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { } } - public static enum OperatorVariation { + public static enum VectorMapJoinVariation { NONE, INNER_BIG_ONLY, INNER, @@ -94,7 +94,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { private HashTableImplementationType hashTableImplementationType; private HashTableKind hashTableKind; private HashTableKeyType hashTableKeyType; - private OperatorVariation operatorVariation; + private VectorMapJoinVariation vectorMapJoinVariation; private boolean minMaxEnabled; private VectorMapJoinInfo vectorMapJoinInfo; @@ -103,7 +103,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; -operatorVariation = OperatorVariation.NONE; +vectorMapJoinVariation = VectorMapJoinVariation.NONE; minMaxEnabled = false; vectorMapJoinInfo = null; } @@ -114,7 +114,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { clone.hashTableImplementationType = this.hashTableImplementationType; clone.hashTableKind = this.hashTableKind; clone.hashTableKeyType = this.hashTableKeyType; -clone.operatorVariation = this.operatorVariation; +clone.vectorMapJoinVariation = this.vectorMapJoinVariation; clone.minMaxEnabled = this.minMaxEnabled; if (vectorMapJoinInfo != null) { throw new RuntimeException("Cloning VectorMapJoinInfo not supported"); @@ -122,7 +122,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { return clone; } - public HashTableImplementationType hashTableImplementationType() { + public HashTableImplementationType getHashTableImplementationType() { return hashTableImplementationType; } @@ -130,7 +130,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { this.hashTableImplementationType = hashTableImplementationType; } - public HashTableKind hashTableKind() { + public HashTableKind getHashTableKind() { return hashTableKind; } @@ -138,7 +138,7 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { this.hashTableKind = hashTableKind; } - public HashTableKeyType hashTableKeyType() { + public HashTableKeyType getHashTableKeyType() { return hashTableKeyType; } @@ -146,15 +146,15 @@ public class VectorMapJoinDesc extends AbstractVectorDesc { this.hashTableKeyType = hashTableKeyType; } - public OperatorVariation operatorVariation() { -return operatorVariation; + public VectorMapJoinVariation getVectorMapJoinVariation() { +return vectorMapJoinVariation; } - public void setOperatorVariation(OperatorVariation operatorVariation) { -this.operatorVariation = operatorVariation; + public void setVectorMapJoinVariation(VectorMapJoinVariation vectorMapJoinVariation) { +this.vectorMapJoinVariation = vectorMapJoinVariation; } - public boolean minMaxEnabled() { + public boolean getMinMaxEnabled() { return minMaxEnabled; } http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java new file mode 100644 index 000..bbdd4a3 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/util/DescriptionTest.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */
[1/3] hive git commit: HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master 88da23829 -> 3c7fb2a97 http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java new file mode 100644 index 000..84103ec --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -0,0 +1,549 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; +import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.ap
[3/3] hive git commit: HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)
HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c7fb2a9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c7fb2a9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c7fb2a9 Branch: refs/heads/master Commit: 3c7fb2a976ad363a2f96588d0c4880f682f0b855 Parents: 88da238 Author: Prasanth Jayachandran Authored: Tue Jul 25 02:49:01 2017 -0700 Committer: Prasanth Jayachandran Committed: Tue Jul 25 02:49:01 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + itests/hive-jmh/pom.xml | 6 + .../vectorization/mapjoin/AbstractMapJoin.java | 179 + .../mapjoin/MapJoinMultiKeyBench.java | 313 + .../mapjoin/MapJoinMultiKeyBenchBase.java | 68 ++ .../mapjoin/MapJoinOneLongKeyBench.java | 313 + .../mapjoin/MapJoinOneLongKeyBenchBase.java | 66 ++ .../mapjoin/MapJoinOneStringKeyBench.java | 313 + .../mapjoin/MapJoinOneStringKeyBenchBase.java | 66 ++ .../hadoop/hive/ql/exec/MapJoinOperator.java| 18 + .../hive/ql/exec/spark/HashTableLoader.java | 2 +- .../hive/ql/exec/vector/VectorBatchDebug.java | 105 +++ .../mapjoin/VectorMapJoinCommonOperator.java| 35 +- .../fast/VectorMapJoinFastTableContainer.java | 8 +- .../VectorMapJoinOptimizedCreateHashTable.java | 6 +- .../hive/ql/optimizer/physical/Vectorizer.java | 26 +- .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 6 +- .../hadoop/hive/ql/plan/VectorMapJoinDesc.java | 24 +- .../hive/ql/exec/util/DescriptionTest.java | 31 + .../CollectorTestOperator.java | 47 ++ .../CountCollectorTestOperator.java | 48 ++ .../CountVectorCollectorTestOperator.java | 47 ++ .../RowCollectorTestOperator.java | 52 ++ .../RowCollectorTestOperatorBase.java | 32 + .../RowVectorCollectorTestOperator.java | 63 ++ .../ql/exec/util/rowobjects/RowTestObjects.java | 100 +++ .../exec/util/rowobjects/RowTestObjectsMap.java | 77 +++ .../util/rowobjects/RowTestObjectsMultiSet.java | 91 +++ .../exec/vector/mapjoin/MapJoinTestConfig.java | 658 +++ .../ql/exec/vector/mapjoin/MapJoinTestData.java | 272 .../vector/mapjoin/MapJoinTestDescription.java | 239 +++ .../vector/mapjoin/TestMapJoinOperator.java | 549 .../util/batchgen/TestVectorBatchGenerate.java | 57 ++ .../batchgen/VectorBatchGenerateStream.java | 63 ++ .../util/batchgen/VectorBatchGenerateUtil.java | 92 +++ .../util/batchgen/VectorBatchGenerator.java | 306 + .../batchgen/VectorColumnGroupGenerator.java| 494 ++ 37 files changed, 4834 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3ebe503..3cf76d0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -560,6 +560,8 @@ public class HiveConf extends Configuration { HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when in testing tez", true), + HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD("hive.mapjoin.testing.no.hash.table.load", false, "internal use only, true when in testing map join", +true), LOCALMODEAUTO("hive.exec.mode.local.auto", false, "Let Hive determine whether to run in local mode automatically"), http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/pom.xml -- diff --git a/itests/hive-jmh/pom.xml b/itests/hive-jmh/pom.xml index af8eb19..0ff584c 100644 --- a/itests/hive-jmh/pom.xml +++ b/itests/hive-jmh/pom.xml @@ -56,7 +56,13 @@ org.apache.hive hive-exec + ${project.version} + + + org.apache.hive + hive-exec ${project.version} + tests org.apache.hadoop http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java -- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/Abstra