[20/32] hive git commit: HIVE-17528 : Add more q-tests for Hive-on-Spark with Parquet vectorized reader (Ferdinand Xu, reviewed by Vihang Karajgaonkar)

xuf Wed, 29 Nov 2017 19:17:59 -0800
http://git-wip-us.apache.org/repos/asf/hive/blob/029e48b7/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out 
b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
new file mode 100644
index 0000000..b445dfb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
@@ -0,0 +1,181 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesparquet
+            Statistics: Num rows: 12288 Data size: 147456 Basic stats: 
COMPLETE Column stats: NONE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+            Filter Operator
+              Filter Vectorization:
+                  className: VectorFilterOperator
+                  native: true
+                  predicateExpression: FilterExprOrExpr(children: 
FilterExprAndExpr(children: SelectColumnIsNotNull(col 11:boolean), 
FilterStringColLikeStringScalar(col 6:string, pattern %b%)), 
FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 
5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), 
SelectColumnIsNotNull(col 9:timestamp), FilterStringColLikeStringScalar(col 
7:string, pattern a)))
+              predicate: (((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 
is not null and (cstring2 like 'a')) or (cboolean2 is not null and (cstring1 
like '%b%'))) (type: boolean)
+              Statistics: Num rows: 9216 Data size: 110592 Basic stats: 
COMPLETE Column stats: NONE
+              Select Operator
+                expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int)
+                outputColumnNames: ctinyint, csmallint, cint
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1, 2]
+                Statistics: Num rows: 9216 Data size: 110592 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: max(csmallint), count(), min(csmallint), 
sum(cint), max(ctinyint)
+                  Group By Vectorization:
+                      aggregators: VectorUDAFMaxLong(col 1:smallint) -> 
smallint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 1:smallint) 
-> smallint, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFMaxLong(col 
0:tinyint) -> tinyint
+                      className: VectorGroupByOperator
+                      groupByMode: HASH
+                      native: false
+                      vectorProcessingMode: HASH
+                      projectedOutputColumnNums: [0, 1, 2, 3, 4]
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkOperator
+                        native: false
+                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
+                    Statistics: Num rows: 1 Data size: 28 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: smallint), _col1 (type: 
bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: 
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 12
+              includeColumns: [0, 1, 2, 5, 6, 7, 9, 11]
+              dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, 
cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, 
ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, 
cboolean2:boolean
+              partitionColumnCount: 0
+              scratchColumnTypeNames: [double]
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0), count(VALUE._col1), 
min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: smallint), (UDFToInteger(_col0) * -75) 
(type: int), _col1 (type: bigint), (UDFToDouble((UDFToInteger(_col0) * -75)) / 
UDFToDouble(_col1)) (type: double), (6981 * UDFToInteger(_col0)) (type: int), 
_col2 (type: smallint), (- _col2) (type: smallint), (197.0 % 
(UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1))) (type: 
double), _col3 (type: bigint), _col4 (type: tinyint), (- _col4) (type: 
tinyint), ((- _col4) + _col4) (type: tinyint)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11
+            Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+16343  -1225725        1070    -1145.53738317757       114090483       -16307  
16307   197.0   -26853917571    11      -11     0
[20/32] hive git commit: HIVE-17528 : Add more q-tests for Hive-on-Spark with Parquet vectorized reader (Ferdinand Xu, reviewed by Vihang Karajgaonkar)

Reply via email to