HIVE-18005: Improve size estimation for array() to be not 0 (Zoltan Haindrich, reviewed by Vineet Garg)
Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f6312418 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f6312418 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f6312418 Branch: refs/heads/master Commit: f63124188ef0965d85ed0af315cab840d1e9af3f Parents: 63f2ec1 Author: Zoltan Haindrich <k...@rxd.hu> Authored: Tue Dec 5 10:48:38 2017 +0100 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Tue Dec 5 10:48:38 2017 +0100 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 21 +-- .../hadoop/hive/ql/stats/TestStatsUtils.java | 35 ++++- .../clientpositive/array_size_estimation.q | 16 ++ .../clientpositive/array_size_estimation.q.out | 155 +++++++++++++++++++ .../beeline/select_dummy_source.q.out | 14 +- .../clientpositive/lateral_view_onview.q.out | 40 ++--- .../clientpositive/lateral_view_onview2.q.out | 40 ++--- .../clientpositive/select_dummy_source.q.out | 14 +- .../test/results/clientpositive/udf_array.q.out | 2 +- .../results/clientpositive/udf_sort_array.q.out | 2 +- .../test/results/clientpositive/udf_split.q.out | 2 +- .../hive/common/type/HiveIntervalDayTime.java | 4 +- .../hadoop/hive/ql/util/JavaDataModel.java | 2 +- 13 files changed, 275 insertions(+), 72 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index e42614c..05c9380 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1234,10 +1234,6 @@ public class StatsUtils { /** * Get the size of complex data types - * @param conf - * - hive conf - * @param oi - * - object inspector * @return raw data size */ public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) { @@ -1271,7 +1267,7 @@ public class StatsUtils { // check if list elements are primitive or Objects ObjectInspector leoi = scloi.getListElementObjectInspector(); if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { - result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length); + result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length, conf); } else { result += JavaDataModel.get().lengthForObjectArrayOfSize(length); } @@ -1373,13 +1369,9 @@ public class StatsUtils { /** * Get the size of arrays of primitive types - * @param colType - * - column type - * @param length - * - array length * @return raw data size */ - public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) { + public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length, HiveConf conf) { String colTypeLowerCase = colType.toLowerCase(); if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME) @@ -1396,12 +1388,21 @@ public class StatsUtils { } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) { return JavaDataModel.get().lengthForBooleanArrayOfSize(length); } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.DATETIME_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { return JavaDataModel.get().lengthForTimestampArrayOfSize(length); } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { return JavaDataModel.get().lengthForDateArrayOfSize(length); } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { return JavaDataModel.get().lengthForDecimalArrayOfSize(length); + } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) { + int configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH); + int siz = JavaDataModel.get().lengthForStringOfLength(configVarLen); + return JavaDataModel.get().lengthForPrimitiveArrayOfSize(siz, length); } else { return 0; } http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index eee9a31..9699bcc 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -18,11 +18,20 @@ package org.apache.hadoop.hive.ql.stats; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.Set; + +import org.apache.commons.lang.reflect.FieldUtils; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.apache.hadoop.hive.serde.serdeConstants; import org.junit.Test; +import org.spark_project.guava.collect.Sets; public class TestStatsUtils { @@ -47,7 +56,6 @@ public class TestStatsUtils { checkCombinedRange(false, new Range(11, 12), new Range(0, 10)); } - private void checkCombinedRange(boolean valid, Range r1, Range r2) { Range r3a = StatsUtils.combineRange(r1, r2); Range r3b = StatsUtils.combineRange(r2, r1); @@ -67,5 +75,30 @@ public class TestStatsUtils { return m <= v && v <= M; } + @Test + public void testPrimitiveSizeEstimations() throws Exception { + HiveConf conf = new HiveConf(); + Set<String> exclusions = Sets.newHashSet(); + exclusions.add(serdeConstants.VOID_TYPE_NAME); + exclusions.add(serdeConstants.LIST_TYPE_NAME); + exclusions.add(serdeConstants.MAP_TYPE_NAME); + exclusions.add(serdeConstants.STRUCT_TYPE_NAME); + exclusions.add(serdeConstants.UNION_TYPE_NAME); + Field[] serdeFields = serdeConstants.class.getFields(); + for (Field field : serdeFields) { + if (!Modifier.isStatic(field.getModifiers())) { + continue; + } + if (!field.getName().endsWith("_TYPE_NAME")) { + continue; + } + String typeName = (String) FieldUtils.readStaticField(field); + if (exclusions.contains(typeName)) { + continue; + } + long siz = StatsUtils.getSizeOfPrimitiveTypeArraysFromType(typeName, 3, conf); + assertNotEquals(field.toString(), 0, siz); + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/queries/clientpositive/array_size_estimation.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/array_size_estimation.q b/ql/src/test/queries/clientpositive/array_size_estimation.q new file mode 100644 index 0000000..74713c4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/array_size_estimation.q @@ -0,0 +1,16 @@ +set hive.stats.fetch.column.stats=true; + +create table t (col string); +insert into t values ('x'); + +explain +select array("b", "d", "c", "a") FROM t; + +explain +select array("b", "d", "c", col) FROM t; + +explain +select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t; + +explain +select sort_array(array("b", "d", "c", col)),array("1","2") FROM t; http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/array_size_estimation.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/array_size_estimation.q.out b/ql/src/test/results/clientpositive/array_size_estimation.q.out new file mode 100644 index 0000000..3cd205f --- /dev/null +++ b/ql/src/test/results/clientpositive/array_size_estimation.q.out @@ -0,0 +1,155 @@ +PREHOOK: query: create table t (col string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (col string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values ('x') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('x') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.col SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain +select array("b", "d", "c", "a") FROM t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select array("b", "d", "c", "a") FROM t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array('b','d','c','a') (type: array<string>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select array("b", "d", "c", col) FROM t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select array("b", "d", "c", col) FROM t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: array('b','d','c',col) (type: array<string>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sort_array(array('b','d','c','a')) (type: array<string>), array('1','2') (type: array<string>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select sort_array(array("b", "d", "c", col)),array("1","2") FROM t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sort_array(array("b", "d", "c", col)),array("1","2") FROM t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sort_array(array('b','d','c',col)) (type: array<string>), array('1','2') (type: array<string>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out b/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out index 0b73e84..b3ca623 100644 --- a/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out +++ b/ql/src/test/results/clientpositive/beeline/select_dummy_source.q.out @@ -85,17 +85,17 @@ STAGE PLANS: Select Operator expressions: array('a','b') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Select Operator expressions: col (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -204,14 +204,14 @@ STAGE PLANS: Select Operator expressions: array('a','b') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Select Operator expressions: col (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select explode(array('a', 'b')) http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/lateral_view_onview.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/lateral_view_onview.q.out b/ql/src/test/results/clientpositive/lateral_view_onview.q.out index 8bd36e2..f09b0bd 100644 --- a/ql/src/test/results/clientpositive/lateral_view_onview.q.out +++ b/ql/src/test/results/clientpositive/lateral_view_onview.q.out @@ -231,17 +231,17 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,23 +249,23 @@ STAGE PLANS: Select Operator expressions: array('a','b','c') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -288,17 +288,17 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -306,23 +306,23 @@ STAGE PLANS: Select Operator expressions: array('a','b','c') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/lateral_view_onview2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/lateral_view_onview2.q.out b/ql/src/test/results/clientpositive/lateral_view_onview2.q.out index 16813e0..aec90de 100644 --- a/ql/src/test/results/clientpositive/lateral_view_onview2.q.out +++ b/ql/src/test/results/clientpositive/lateral_view_onview2.q.out @@ -64,17 +64,17 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -82,23 +82,23 @@ STAGE PLANS: Select Operator expressions: array('a','b','c') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -121,17 +121,17 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -139,23 +139,23 @@ STAGE PLANS: Select Operator expressions: array('a','b','c') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 592000 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 648000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 56000 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 9 - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 252 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/select_dummy_source.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/select_dummy_source.q.out b/ql/src/test/results/clientpositive/select_dummy_source.q.out index 0b73e84..b3ca623 100644 --- a/ql/src/test/results/clientpositive/select_dummy_source.q.out +++ b/ql/src/test/results/clientpositive/select_dummy_source.q.out @@ -85,17 +85,17 @@ STAGE PLANS: Select Operator expressions: array('a','b') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Select Operator expressions: col (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -204,14 +204,14 @@ STAGE PLANS: Select Operator expressions: array('a','b') (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Select Operator expressions: col (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select explode(array('a', 'b')) http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/udf_array.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_array.q.out b/ql/src/test/results/clientpositive/udf_array.q.out index 1abb399..16aedb2 100644 --- a/ql/src/test/results/clientpositive/udf_array.q.out +++ b/ql/src/test/results/clientpositive/udf_array.q.out @@ -31,7 +31,7 @@ STAGE PLANS: Select Operator expressions: array() (type: array<string>), array()[1] (type: string), array(1,2,3) (type: array<int>), array(1,2,3)[2] (type: int), array(1,'a',2,3) (type: array<string>), array(1,'a',2,3)[2] (type: string), array(array(1),array(2),array(3),array(4))[1][0] (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 500 Data size: 216000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 624000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2], http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/udf_sort_array.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_sort_array.q.out b/ql/src/test/results/clientpositive/udf_sort_array.q.out index 1e9dc85..f375e85 100644 --- a/ql/src/test/results/clientpositive/udf_sort_array.q.out +++ b/ql/src/test/results/clientpositive/udf_sort_array.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Select Operator expressions: sort_array(array('b','d','c','a')) (type: array<string>) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 388000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src tablesample (1 rows) http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/ql/src/test/results/clientpositive/udf_split.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_split.q.out b/ql/src/test/results/clientpositive/udf_split.q.out index d62cc61..c05a8dd 100644 --- a/ql/src/test/results/clientpositive/udf_split.q.out +++ b/ql/src/test/results/clientpositive/udf_split.q.out @@ -42,7 +42,7 @@ STAGE PLANS: Select Operator expressions: split('a b c', ' ') (type: array<string>), split('oneAtwoBthreeC', '[ABC]') (type: array<string>), split('', '.') (type: array<string>), split(50401020, 0) (type: array<string>) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 1276000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java index b891e27..cb1306e 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.common.type; import java.math.BigDecimal; -import java.sql.Timestamp; -import java.util.Date; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -27,7 +25,6 @@ import java.util.regex.Pattern; import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hive.common.util.IntervalDayTimeUtils; -import sun.util.calendar.BaseCalendar; /** * Day-time interval type representing an offset in days/hours/minutes/seconds, @@ -170,6 +167,7 @@ public class HiveIntervalDayTime implements Comparable<HiveIntervalDayTime> { /** * Return a copy of this object. */ + @Override public Object clone() { return new HiveIntervalDayTime(totalSeconds, nanos); } http://git-wip-us.apache.org/repos/asf/hive/blob/f6312418/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java index df952cb..68ea6db 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java @@ -282,7 +282,7 @@ public enum JavaDataModel { return ((size + 8) >> 3) << 3; } - private long lengthForPrimitiveArrayOfSize(int primitiveSize, long length) { + public long lengthForPrimitiveArrayOfSize(int primitiveSize, long length) { return alignUp(array() + primitiveSize*length, memoryAlign()); }