Repository: hive Updated Branches: refs/heads/branch-3 5a9f7caf7 -> 2e02896e9
HIVE-19578: HLL merges tempList on every add (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2e02896e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2e02896e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2e02896e Branch: refs/heads/branch-3 Commit: 2e02896e9b93607dabd861c41b31e291b65ef5f4 Parents: 5a9f7ca Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Fri May 25 11:18:57 2018 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Fri May 25 11:36:04 2018 -0700 ---------------------------------------------------------------------- .../llap/hybridgrace_hashjoin_1.q.out | 12 ++++----- .../clientpositive/llap/limit_pushdown.q.out | 6 ++--- .../clientpositive/llap/limit_pushdown3.q.out | 10 ++++---- .../llap/llap_vector_nohybridgrace.q.out | 4 +-- .../clientpositive/llap/mapjoin_decimal.q.out | 8 +++--- .../llap/offset_limit_ppd_optimizer.q.out | 6 ++--- .../clientpositive/llap/semijoin_hint.q.out | 18 +++++++------- .../llap/tez_dynpart_hashjoin_2.q.out | 24 +++++++++--------- .../llap/tez_vector_dynpart_hashjoin_2.q.out | 24 +++++++++--------- .../llap/vector_left_outer_join.q.out | 4 +-- .../llap/vectorization_limit.q.out | 10 ++++---- .../llap/vectorization_short_regress.q.out | 26 ++++++++++---------- .../llap/vectorized_context.q.out | 6 ++--- .../llap/vectorized_distinct_gby.q.out | 8 +++--- .../llap/vectorized_mapjoin.q.out | 4 +-- .../llap/vectorized_nested_mapjoin.q.out | 2 +- .../llap/vectorized_shufflejoin.q.out | 4 +-- .../tez/hybridgrace_hashjoin_1.q.out | 12 ++++----- .../hive/common/ndv/hll/HLLSparseRegister.java | 26 ++++++-------------- .../hadoop/hive/common/ndv/hll/HyperLogLog.java | 2 +- 20 files changed, 102 insertions(+), 114 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out index b8df0d0..923b8fb 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out @@ -56,7 +56,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -177,7 +177,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -296,7 +296,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 149616 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -413,7 +413,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 149616 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -527,7 +527,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -638,7 +638,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 79311d0..76fae9a 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -399,12 +399,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs @@ -415,7 +415,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index d6f624c..a41091c 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -428,12 +428,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs @@ -444,11 +444,11 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -456,7 +456,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out b/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 2c13d5d..6402848 100644 --- a/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -65,7 +65,7 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -244,7 +244,7 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out index 77d1bde..0bf0f94 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out @@ -112,11 +112,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1102 Data size: 246848 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(4,2)) sort order: + - Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1102 Data size: 246848 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(4,0)) Execution mode: llap LLAP IO: all inputs @@ -145,10 +145,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1102 Data size: 246848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1102 Data size: 246848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 09a120a..6646027 100644 --- a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -403,12 +403,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: llap LLAP IO: all inputs @@ -419,7 +419,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5570 Data size: 33272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5567 Data size: 33256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index f2cef7a..e6f2346 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -478,16 +478,16 @@ STAGE PLANS: 0 cstring (type: string) 1 value (type: string) outputColumnNames: str - Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4058 Data size: 353046 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) sort order: + Map-reduce partition columns: str (type: string) - Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4058 Data size: 353046 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4056 Data size: 705744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4058 Data size: 706092 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) mode: hash @@ -506,9 +506,9 @@ STAGE PLANS: keys: 0 str (type: string) 1 key1 (type: string) - Statistics: Num rows: 4461 Data size: 388159 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 4463 Data size: 388350 Basic stats: PARTIAL Column stats: NONE Select Operator - Statistics: Num rows: 4461 Data size: 388159 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 4463 Data size: 388350 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1476,16 +1476,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4058 Data size: 353046 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 4056 Data size: 352872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4058 Data size: 353046 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4056 Data size: 705744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4058 Data size: 706092 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) mode: hash @@ -1504,7 +1504,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 4461 Data size: 388159 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 4463 Data size: 388350 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out index ae9b37a..4ccfe1a 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out @@ -105,11 +105,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -117,10 +117,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -296,11 +296,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -308,10 +308,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -487,11 +487,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -499,10 +499,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out index ae9b37a..4ccfe1a 100644 --- a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out @@ -105,11 +105,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -117,10 +117,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -296,11 +296,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -308,10 +308,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -487,11 +487,11 @@ STAGE PLANS: 1 UDFToInteger(_col0) (type: int) 2 (UDFToInteger(_col0) + 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: vectorized, llap @@ -499,10 +499,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1776 Data size: 312050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1777 Data size: 312360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index dc8f47e..fe3b1e5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -51,7 +51,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 87720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 87772 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -60,7 +60,7 @@ STAGE PLANS: 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 2423154 Data size: 19385232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2424412 Data size: 19395296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 7be4d7d..c569e68 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -897,7 +897,7 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4157 Data size: 58088 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -908,7 +908,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4157 Data size: 58088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -956,7 +956,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4157 Data size: 58088 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ @@ -966,7 +966,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4157 Data size: 58088 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -991,7 +991,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4157 Data size: 58088 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 7362b04..7325ac0 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -2207,7 +2207,7 @@ STAGE PLANS: keys: _col0 (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1141 Data size: 76436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1140 Data size: 76368 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -2216,7 +2216,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1141 Data size: 76436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1140 Data size: 76368 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2251,7 +2251,7 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1141 Data size: 76436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1140 Data size: 76368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col4 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col4)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), ((_col5 - ((_col6 * _col6) / _col7)) / _col7) (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col8 (type: bigint), (_col8 - -89010L) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2260,7 +2260,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 9, 10, 16, 4, 18, 19, 17, 14, 8, 20] selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 9:int, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 14:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 10:double) -> 11:double) -> 10:double, IfExprNullCondExpr(col 12:boolean, null, col 13:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 12:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 13:bigint) -> 14:bigint) -> 11:double) -> 10:double, DecimalScalarDivideDecimalColumn(val -1.389, col 15:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 15:decimal(5,0)) -> 16:decimal(10,9), DoubleColDivideDoubleColumn(col 11:double, col 17:double)(children: CastLongToDouble(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -7 5)(children: col 0:smallint) -> 14:int) -> 11:double, CastLongToDouble(col 4:bigint) -> 17:double) -> 18:double, LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 19:int, DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 17:double)(children: DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 11:double) -> 17:double) -> 11:double) -> 17:double, LongColUnaryMinus(col 20:int)(children: LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 20:int) -> 14:int, LongColSubtractLongScalar(col 8:bigint, val -89010) -> 20:bigint - Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1140 Data size: 199488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ @@ -2268,7 +2268,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1140 Data size: 199488 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -2286,7 +2286,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1140 Data size: 199488 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -2484,7 +2484,7 @@ STAGE PLANS: keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1136 Data size: 52232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1135 Data size: 52192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -2493,7 +2493,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1136 Data size: 52232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1135 Data size: 52192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2528,7 +2528,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1136 Data size: 52232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1135 Data size: 52192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double), (2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), _col4 (type: bigint), ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D) (type: double), ((- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) * ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D)) (type: double), _col5 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (_col0 - (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THE N (null) ELSE ((_col3 - 1)) END))) (type: double), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (_col0 + ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (_col0 * 762.0D) (type: double), _col2 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2537,7 +2537,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 7, 6, 11, 4, 17, 20, 5, 23, 26, 14, 29, 30, 2, 34] selectExpressions: DoubleColDivideLongColumn(col 6:double, col 10:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 7:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 7:double) -> 6:double, IfExprNullCondExpr(col 8:boolean, null, col 9:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 9:bigint) -> 10:bigint) -> 7:double, DoubleScalarMultiplyDoubleColumn(val 2563.58, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 11:double) -> 6:double, IfExprNullCondExpr(col 10:boolean, null, col 12:bigint)(children: LongColEqualLongScal ar(col 3:bigint, val 1) -> 10:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 11:double) -> 6:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 11:double) -> 14:double) -> 11:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 11:double, DoubleColAddDoubleScalar(col 14:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 17:double)(children: DoubleColDivideLongColumn(col 14:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 17:double)(children: DoubleColDivi deLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 17:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 14:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 14:double, col 23:double)(children: DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 20:double) -> 14:double, IfExprNullCondExpr(col 19:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 20:double) -> 14:double, DoubleColAddDoubleScalar(col 20:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 20:double) -> 23:double) -> 20:double, IfExprNullCondExpr(col 22:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 20:double) -> 23:double) -> 20:double, DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:dou ble) -> 23:double) -> 14:double) -> 23:double, DoubleColSubtractDoubleColumn(col 0:double, col 14:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 26:double) -> 14:double, IfExprNullCondExpr(col 25:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 14:double) -> 26:double, FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, co l 2:double) -> 14:double) -> 29:double) -> 14:double) -> 29:double) -> 14:double, DoubleColAddDoubleColumn(col 0:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 29:double) -> 30:double) -> 29:double, IfExprNullCondExpr(col 28:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 30:double) -> 29:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 30:double, DoubleScalarModuloDoubleColumn(val -863.257, col 33:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 33:double) -> 34:double - Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1135 Data size: 142992 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -2545,7 +2545,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1135 Data size: 142992 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -2563,13 +2563,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1135 Data size: 142992 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1135 Data size: 142992 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vectorized_context.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 8907c7f..bc1a203 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -133,7 +133,7 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 6124 Data size: 49032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6126 Data size: 49056 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -143,11 +143,11 @@ STAGE PLANS: outputColumnNames: _col2, _col5 input vertices: 1 Map 3 - Statistics: Num rows: 6173 Data size: 625962 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6177 Data size: 626386 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6173 Data size: 625962 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6177 Data size: 626386 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 10208 Basic stats: COMPLETE Column stats: COMPLETE http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 9198ac2..05bf12c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -209,7 +209,7 @@ STAGE PLANS: keys: cint (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6027 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -220,7 +220,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6027 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -265,7 +265,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6027 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), UDFToDouble(_col0) (type: double), (UDFToDouble(_col0) * UDFToDouble(_col0)) (type: double) outputColumnNames: _col0, _col1, _col2 @@ -274,7 +274,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 4] selectExpressions: CastLongToDouble(col 0:int) -> 1:double, DoubleColMultiplyDoubleColumn(col 2:double, col 3:double)(children: CastLongToDouble(col 0:int) -> 2:double, CastLongToDouble(col 0:int) -> 3:double) -> 4:double - Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6027 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 8abd234..7bacad8 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -58,7 +58,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 131024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -67,7 +67,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 2, 13] selectExpressions: LongColAddLongColumn(col 2:int, col 2:int) -> 13:int - Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 131024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) Group By Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index a2ca7f8..8e3323b 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -54,7 +54,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 1452263 Data size: 11604232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1452959 Data size: 11609800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 498f9b9..776e86c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -118,11 +118,11 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 131024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 131024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out index db2d3c8..5b9149c 100644 --- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -56,7 +56,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -175,7 +175,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -293,7 +293,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 149616 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -408,7 +408,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 18694 Data size: 149552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18702 Data size: 149616 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -521,7 +521,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -630,7 +630,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java index 82085dd..d62b858 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java @@ -148,12 +148,7 @@ public class HLLSparseRegister implements HLLRegister { } public int getSize() { - - // merge temp list before getting the size of sparse map - if (tempListIdx != 0) { - mergeTempListToSparseMap(); - } - return sparseMap.size(); + return sparseMap.size() + tempListIdx; } public void merge(HLLRegister hllRegister) { @@ -172,27 +167,20 @@ public class HLLSparseRegister implements HLLRegister { } public boolean set(int key, byte value) { - boolean updated = false; - // retain only the largest value for a register index - if (sparseMap.containsKey(key)) { - byte containedVal = sparseMap.get(key); - if (value > containedVal) { - sparseMap.put(key, value); - updated = true; - } - } else { + Byte containedValue = sparseMap.get(key); + if (containedValue == null || value > containedValue) { sparseMap.put(key, value); - updated = true; + return true; } - return updated; + return false; } public TreeMap<Integer,Byte> getSparseMap() { - return sparseMap; + return getMergedSparseMap(); } - public TreeMap<Integer,Byte> getMergedSparseMap() { + private TreeMap<Integer,Byte> getMergedSparseMap() { if (tempListIdx != 0) { mergeTempListToSparseMap(); } http://git-wip-us.apache.org/repos/asf/hive/blob/2e02896e/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java index 07a93c6..a3cc989 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -281,7 +281,7 @@ public class HyperLogLog implements NumDistinctValueEstimator { // if encoding is still SPARSE use linear counting with increase // accuracy (as we use pPrime bits for register index) int mPrime = 1 << sparseRegister.getPPrime(); - cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSize()); + cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSparseMap().size()); } else { // for DENSE encoding, use bias table lookup for HLLNoBias algorithm