hive git commit: HIVE-20684 : Make compute stats work for Druid tables (Nishant Bangarwa via Ashutosh Chauhan)

hashutosh Tue, 09 Oct 2018 19:33:32 -0700

Repository: hive
Updated Branches:
  refs/heads/master b054174bb -> ddf765ecb



HIVE-20684 : Make compute stats work for Druid tables (Nishant Bangarwa via 
Ashutosh Chauhan)

test plan changes due to removal of virtual columns

Signed-off-by: Ashutosh Chauhan <hashut...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ddf765ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ddf765ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ddf765ec

Branch: refs/heads/master
Commit: ddf765ecbf44888dee2ed95dc8474b4cca84fcd5
Parents: b054174
Author: Nishant <nishant.mon...@gmail.com>
Authored: Thu Oct 4 00:37:53 2018 +0530
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Tue Oct 9 19:32:02 2018 -0700

----------------------------------------------------------------------
 .../hive/druid/DruidStorageHandlerUtils.java    |   3 +-
 .../druid/io/DruidQueryBasedInputFormat.java    |  10 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  14 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  14 +-
 .../ql/udf/generic/GenericUDAFComputeStats.java |   1 +
 .../clientpositive/druidmini_test_insert.q      |   2 +
 .../druid/druidmini_expressions.q.out           | 156 ++++++++-----------
 .../clientpositive/druid/druidmini_mv.q.out     |   8 +-
 .../druid/druidmini_test_insert.q.out           |  10 ++
 .../llap/external_jdbc_table2.q.out             |  18 +--
 .../results/clientpositive/llap/sysdb.q.out     |   2 +-
 .../PrimitiveObjectInspectorUtils.java          |   4 +
 12 files changed, 122 insertions(+), 120 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
----------------------------------------------------------------------
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
index b9eb367..e67de89 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
@@ -672,13 +672,14 @@ public final class DruidStorageHandlerUtils {
     );
   }
 
-  public static String createScanAllQuery(String dataSourceName) throws 
JsonProcessingException {
+  public static String createScanAllQuery(String dataSourceName, List<String> 
columns) throws JsonProcessingException {
     final ScanQuery.ScanQueryBuilder scanQueryBuilder = 
ScanQuery.newScanQueryBuilder();
     final List<Interval> intervals = Arrays.asList(DEFAULT_INTERVAL);
     ScanQuery scanQuery = scanQueryBuilder
         .dataSource(dataSourceName)
         .resultFormat(ScanQuery.RESULT_FORMAT_COMPACTED_LIST)
         .intervals(new MultipleIntervalSegmentSpec(intervals))
+        .columns(columns)
         .build();
     return JSON_MAPPER.writeValueAsString(scanQuery);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
----------------------------------------------------------------------
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
index f5009a2..9266fae 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
@@ -45,6 +45,7 @@ import 
org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader;
 import org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader;
 import org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader;
 import org.apache.hadoop.hive.druid.serde.DruidWritable;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.JobConf;
@@ -114,6 +115,7 @@ public class DruidQueryBasedInputFormat extends 
InputFormat<NullWritable, DruidW
       throw new IOException("Druid broker address not specified in 
configuration");
     }
     String druidQuery = 
StringEscapeUtils.unescapeJava(conf.get(Constants.DRUID_QUERY_JSON));
+
     String druidQueryType;
     if (StringUtils.isEmpty(druidQuery)) {
       // Empty, maybe because CBO did not run; we fall back to
@@ -125,9 +127,9 @@ public class DruidQueryBasedInputFormat extends 
InputFormat<NullWritable, DruidW
       if (dataSource == null || dataSource.isEmpty()) {
         throw new IOException("Druid data source cannot be empty or null");
       }
-
-      druidQuery = DruidStorageHandlerUtils.createScanAllQuery(dataSource);
+      druidQuery = DruidStorageHandlerUtils.createScanAllQuery(dataSource, 
Utilities.getColumnNames(conf));
       druidQueryType = Query.SCAN;
+      conf.set(Constants.DRUID_QUERY_TYPE, druidQueryType);
     } else {
       druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE);
       if (druidQueryType == null) {
@@ -286,7 +288,7 @@ public class DruidQueryBasedInputFormat extends 
InputFormat<NullWritable, DruidW
     final DruidQueryRecordReader<?, ?> reader;
     final String druidQueryType = job.get(Constants.DRUID_QUERY_TYPE);
     if (druidQueryType == null) {
-      reader = new DruidSelectQueryRecordReader(); // By default
+      reader = new DruidScanQueryRecordReader(); // By default we use scan 
query as fallback.
       reader.initialize((HiveDruidSplit) split, job);
       return reader;
     }
@@ -307,7 +309,7 @@ public class DruidQueryBasedInputFormat extends 
InputFormat<NullWritable, DruidW
     // The reason is that Druid results format is different for each type.
     final String druidQueryType = 
context.getConfiguration().get(Constants.DRUID_QUERY_TYPE);
     if (druidQueryType == null) {
-      return new DruidSelectQueryRecordReader(); // By default
+      return new DruidScanQueryRecordReader(); // By default, we use druid 
scan query as fallback.
     }
     final DruidQueryRecordReader<?, ?> reader =
             getDruidQueryReader(druidQueryType);

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 10aa94e..e3e329f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -2133,9 +2133,19 @@ public final class Utilities {
   public static List<String> getColumnNames(Properties props) {
     List<String> names = new ArrayList<String>();
     String colNames = props.getProperty(serdeConstants.LIST_COLUMNS);
+    return splitColNames(names, colNames);
+  }
+
+  public static List<String> getColumnNames(Configuration conf) {
+    List<String> names = new ArrayList<String>();
+    String colNames = conf.get(serdeConstants.LIST_COLUMNS);
+    return splitColNames(names, colNames);
+  }
+
+  private static List<String> splitColNames(List<String> names, String 
colNames) {
     String[] cols = colNames.trim().split(",");
-    for (String col : cols) {
-      if (StringUtils.isNotBlank(col)) {
+    for(String col : cols) {
+      if(StringUtils.isNotBlank(col)) {
         names.add(col);
       }
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index e81ed50..31bc38e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -11126,11 +11126,15 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
       // use a list for easy cumtomize
       List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
-      while (vcs.hasNext()) {
-        VirtualColumn vc = vcs.next();
-        rwsch.put(alias, vc.getName().toLowerCase(), new 
ColumnInfo(vc.getName(),
-            vc.getTypeInfo(), alias, true, vc.getIsHidden()));
-        vcList.add(vc);
+      if(!tab.isNonNative()) {
+        // Virtual columns are only for native tables
+        while (vcs.hasNext()) {
+          VirtualColumn vc = vcs.next();
+          rwsch.put(alias, vc.getName().toLowerCase(), new 
ColumnInfo(vc.getName(),
+                  vc.getTypeInfo(), alias, true, vc.getIsHidden()
+          ));
+          vcList.add(vc);
+        }
       }
 
       // Create the root of the operator tree

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
index dd365dd..642f42b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
@@ -84,6 +84,7 @@ public class GenericUDAFComputeStats extends 
AbstractGenericUDAFResolver {
     case INT:
     case LONG:
     case TIMESTAMP:
+    case TIMESTAMPLOCALTZ:
       return new GenericUDAFLongStatsEvaluator();
     case FLOAT:
     case DOUBLE:

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/test/queries/clientpositive/druidmini_test_insert.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druidmini_test_insert.q 
b/ql/src/test/queries/clientpositive/druidmini_test_insert.q
index 09e4a19..e97fe5b 100644
--- a/ql/src/test/queries/clientpositive/druidmini_test_insert.q
+++ b/ql/src/test/queries/clientpositive/druidmini_test_insert.q
@@ -34,6 +34,8 @@ SELECT cast (`ctimestamp1` as timestamp with local time zone) 
as `__time`,
   cboolean2
   FROM alltypesorc where ctimestamp1 IS NOT NULL;
 
+-- ANALYZE COLUMN STATS FOR DRUID TABLE
+analyze table druid_alltypesorc compute statistics for columns;
 
 SELECT COUNT(*) FROM druid_alltypesorc;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out 
b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
index 26ef4d2..59285e8 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
@@ -393,22 +393,18 @@ STAGE PLANS:
                     druid.query.json 
{"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
                     druid.query.type groupBy
                   Statistics: Num rows: 9173 Data size: 3625856 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: extract (type: timestamp with local time 
zone), cstring1 (type: string), cstring2 (type: string), $f3 (type: double)
-                    outputColumnNames: extract, cstring1, cstring2, $f3
+                  Group By Operator
+                    aggregations: count(cstring2), sum($f3)
+                    keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 9173 Data size: 3625856 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(cstring2), sum($f3)
-                      keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                    Reduce Output Operator
+                      key expressions: _col0 (type: timestamp with local time 
zone), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
                       Statistics: Num rows: 9173 Data size: 3625856 Basic 
stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: timestamp with local 
time zone), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
-                        Statistics: Num rows: 9173 Data size: 3625856 Basic 
stats: COMPLETE Column stats: NONE
-                        value expressions: _col2 (type: bigint), _col3 (type: 
double)
+                      value expressions: _col2 (type: bigint), _col3 (type: 
double)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -468,22 +464,18 @@ STAGE PLANS:
                     druid.query.json 
{"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cdouble","outputName":"cdouble","outputType":"DOUBLE"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
                     druid.query.type groupBy
                   Statistics: Num rows: 9173 Data size: 2091840 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: extract (type: timestamp with local time 
zone), cstring1 (type: string), cdouble (type: double), $f3 (type: double)
-                    outputColumnNames: extract, cstring1, cdouble, $f3
+                  Group By Operator
+                    aggregations: count(cdouble), sum($f3)
+                    keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 9173 Data size: 2091840 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(cdouble), sum($f3)
-                      keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                    Reduce Output Operator
+                      key expressions: _col0 (type: timestamp with local time 
zone), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
                       Statistics: Num rows: 9173 Data size: 2091840 Basic 
stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: timestamp with local 
time zone), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
-                        Statistics: Num rows: 9173 Data size: 2091840 Basic 
stats: COMPLETE Column stats: NONE
-                        value expressions: _col2 (type: bigint), _col3 (type: 
double)
+                      value expressions: _col2 (type: bigint), _col3 (type: 
double)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -543,22 +535,18 @@ STAGE PLANS:
                     druid.query.json 
{"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","expression":"(CAST(2,
 'DOUBLE') * 
\"cdouble\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
                     druid.query.type groupBy
                   Statistics: Num rows: 9173 Data size: 3625856 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: extract (type: timestamp with local time 
zone), cstring1 (type: string), cstring2 (type: string), $f3 (type: double)
-                    outputColumnNames: extract, cstring1, cstring2, $f3
+                  Group By Operator
+                    aggregations: count(cstring2), sum($f3)
+                    keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 9173 Data size: 3625856 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(cstring2), sum($f3)
-                      keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                    Reduce Output Operator
+                      key expressions: _col0 (type: timestamp with local time 
zone), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
                       Statistics: Num rows: 9173 Data size: 3625856 Basic 
stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: timestamp with local 
time zone), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
-                        Statistics: Num rows: 9173 Data size: 3625856 Basic 
stats: COMPLETE Column stats: NONE
-                        value expressions: _col2 (type: bigint), _col3 (type: 
double)
+                      value expressions: _col2 (type: bigint), _col3 (type: 
double)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -618,22 +606,18 @@ STAGE PLANS:
                     druid.query.json 
{"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cstring1","outputName":"cstring1","outputType":"STRING"},{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"concat(concat(\"cstring2\",'_'),\"cstring1\")","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f3","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
                     druid.query.type groupBy
                   Statistics: Num rows: 9173 Data size: 3625856 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: extract (type: timestamp with local time 
zone), cstring1 (type: string), vc (type: string), $f3 (type: double)
-                    outputColumnNames: extract, cstring1, vc, $f3
+                  Group By Operator
+                    aggregations: count(vc), sum($f3)
+                    keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 9173 Data size: 3625856 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(vc), sum($f3)
-                      keys: extract (type: timestamp with local time zone), 
cstring1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                    Reduce Output Operator
+                      key expressions: _col0 (type: timestamp with local time 
zone), _col1 (type: string)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
                       Statistics: Num rows: 9173 Data size: 3625856 Basic 
stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: timestamp with local 
time zone), _col1 (type: string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: timestamp 
with local time zone), _col1 (type: string)
-                        Statistics: Num rows: 9173 Data size: 3625856 Basic 
stats: COMPLETE Column stats: NONE
-                        value expressions: _col2 (type: bigint), _col3 (type: 
double)
+                      value expressions: _col2 (type: bigint), _col3 (type: 
double)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -693,19 +677,15 @@ STAGE PLANS:
                     druid.query.json 
{"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
                     druid.query.type groupBy
                   Statistics: Num rows: 9173 Data size: 1603744 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cstring2 (type: string)
-                    outputColumnNames: cstring2
-                    Statistics: Num rows: 9173 Data size: 1603744 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(cstring2)
-                      mode: hash
-                      outputColumnNames: _col0
+                  Group By Operator
+                    aggregations: count(cstring2)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: bigint)
+                      value expressions: _col0 (type: bigint)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -760,19 +740,15 @@ STAGE PLANS:
                     druid.query.json 
{"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"cstring2","outputName":"cstring2","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
                     druid.query.type groupBy
                   Statistics: Num rows: 9173 Data size: 1673472 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cstring2 (type: string), $f1 (type: double)
-                    outputColumnNames: cstring2, $f1
-                    Statistics: Num rows: 9173 Data size: 1673472 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(cstring2), sum($f1)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                  Group By Operator
+                    aggregations: count(cstring2), sum($f1)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: bigint), _col1 (type: 
double)
+                      value expressions: _col0 (type: bigint), _col1 (type: 
double)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -827,19 +803,15 @@ STAGE PLANS:
                     druid.query.json 
{"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"concat(concat(\"cstring2\",'_'),\"cstring1\")","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cdouble"},{"type":"longMin","name":"$f2","fieldName":"cint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
                     druid.query.type groupBy
                   Statistics: Num rows: 9173 Data size: 1708336 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: vc (type: string), $f1 (type: double), $f2 
(type: int)
-                    outputColumnNames: vc, $f1, $f2
-                    Statistics: Num rows: 9173 Data size: 1708336 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(vc), sum($f1), min($f2)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
+                  Group By Operator
+                    aggregations: count(vc), sum($f1), min($f2)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 216 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 216 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 216 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: bigint), _col1 (type: 
double), _col2 (type: int)
+                      value expressions: _col0 (type: bigint), _col1 (type: 
double), _col2 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out 
b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
index 0106ffa..8f9c4c0 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
@@ -273,10 +273,10 @@ STAGE PLANS:
                 keys:
                   0 
                   1 
-                outputColumnNames: _col0, _col1, _col6
+                outputColumnNames: _col0, _col1, _col3
                 Statistics: Num rows: 18 Data size: 522 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col1 (type: double), _col0 
(type: int), _col6 (type: double)
+                  expressions: _col0 (type: int), _col1 (type: double), _col0 
(type: int), _col3 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 18 Data size: 522 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
@@ -636,10 +636,10 @@ STAGE PLANS:
                 keys:
                   0 
                   1 
-                outputColumnNames: _col0, _col1, _col6
+                outputColumnNames: _col0, _col1, _col3
                 Statistics: Num rows: 21 Data size: 609 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col1 (type: double), _col0 
(type: int), _col6 (type: double)
+                  expressions: _col0 (type: int), _col1 (type: double), _col0 
(type: int), _col3 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 21 Data size: 609 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out 
b/ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out
index a4a5594..e568b14 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out
@@ -90,6 +90,16 @@ SELECT cast (`ctimestamp1` as timestamp with local time 
zone) as `__time`,
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@druid_alltypesorc
+PREHOOK: query: analyze table druid_alltypesorc compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@druid_alltypesorc
+PREHOOK: Output: default@druid_alltypesorc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: analyze table druid_alltypesorc compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@druid_alltypesorc
+POSTHOOK: Output: default@druid_alltypesorc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
 PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc
 PREHOOK: type: QUERY
 PREHOOK: Input: default@druid_alltypesorc

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out 
b/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
index d511498..9bfee47 100644
--- a/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
+++ b/ql/src/test/results/clientpositive/llap/external_jdbc_table2.q.out
@@ -332,19 +332,15 @@ WHERE "ikey" IS NOT NULL
                 keys:
                   0 ikey (type: int)
                   1 ikey (type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, 
_col9, _col10
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
                 Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: bigint), _col2 
(type: float), _col3 (type: double), _col7 (type: int), _col8 (type: bigint), 
_col9 (type: float), _col10 (type: double)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+                File Output Operator
+                  compressed: false
                   Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE 
Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 1 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/ql/src/test/results/clientpositive/llap/sysdb.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/sysdb.q.out 
b/ql/src/test/results/clientpositive/llap/sysdb.q.out
index b3be3f7..d2202d0 100644
--- a/ql/src/test/results/clientpositive/llap/sysdb.q.out
+++ b/ql/src/test/results/clientpositive/llap/sysdb.q.out
@@ -3872,7 +3872,7 @@ STAGE PLANS:
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
-                      vectorizationSchemaColumns: [0:sd_id:bigint, 
1:cd_id:bigint, 2:input_format:string, 3:is_compressed:boolean, 
4:is_storedassubdirectories:boolean, 5:location:string, 6:num_buckets:int, 
7:output_format:string, 8:serde_id:bigint, 
9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                      vectorizationSchemaColumns: [0:sd_id:bigint, 
1:cd_id:bigint, 2:input_format:string, 3:is_compressed:boolean, 
4:is_storedassubdirectories:boolean, 5:location:string, 6:num_buckets:int, 
7:output_format:string, 8:serde_id:bigint]
                   Select Operator
                     Select Vectorization:
                         className: VectorSelectOperator

http://git-wip-us.apache.org/repos/asf/hive/blob/ddf765ec/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
index 0dbecb7..5624315 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
@@ -805,6 +805,10 @@ public final class PrimitiveObjectInspectorUtils {
       result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)
           .getSeconds();
       break;
+    case TIMESTAMPLOCALTZ:
+      result = ((TimestampLocalTZObjectInspector) 
oi).getPrimitiveWritableObject(o)
+                .getSeconds();
+      break;
     case DECIMAL:
       {
         HiveDecimal dec = ((HiveDecimalObjectInspector) oi)

hive git commit: HIVE-20684 : Make compute stats work for Druid tables (Nishant Bangarwa via Ashutosh Chauhan)

Reply via email to