This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 4b8ca41 HIVE-20079: Populate more accurate rawDataSize for parquet format (Aihua Xu reviewed by Antal Sinkovits, BELUGA BEHR and Peter Vary) 4b8ca41 is described below commit 4b8ca415888880818bb86000d714e5c271e5c5fe Author: Aihua Xu <aihu...@apache.org> AuthorDate: Tue Feb 26 13:35:11 2019 +0100 HIVE-20079: Populate more accurate rawDataSize for parquet format (Aihua Xu reviewed by Antal Sinkovits, BELUGA BEHR and Peter Vary) --- .../hive/ql/io/parquet/serde/ParquetHiveSerDe.java | 41 +++-------- .../parquet/write/ParquetRecordWriterWrapper.java | 38 ++++++++-- .../hive/ql/io/parquet/TestParquetSerDe.java | 9 +-- ql/src/test/queries/clientpositive/parquet_stats.q | 12 ++++ .../llap/vector_partitioned_date_time.q.out | 8 +-- .../results/clientpositive/parquet_analyze.q.out | 2 +- .../results/clientpositive/parquet_stats.q.out | 63 +++++++++++++++++ .../clientpositive/spark/parquet_join.q.out | 52 +++++++------- .../spark/parquet_vectorization_decimal_date.q.out | 10 +-- .../spark/parquet_vectorization_part_project.q.out | 12 ++-- .../spark/spark_dynamic_partition_pruning.q.out | 6 +- .../vectorization_input_format_excludes.q.out | 80 +++++++++++----------- .../spark/vectorization_parquet_projection.q.out | 38 +++++----- .../vectorization_parquet_projection.q.out | 8 +-- 14 files changed, 230 insertions(+), 149 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index e0018a5..5d98b69 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -69,30 +69,15 @@ public class ParquetHiveSerDe extends AbstractSerDe { } } - private SerDeStats stats; private ObjectInspector objInspector; - - private enum LAST_OPERATION { - SERIALIZE, - DESERIALIZE, - UNKNOWN - } - - private LAST_OPERATION status; - private long serializedSize; - private long deserializedSize; - private ParquetHiveRecord parquetRow; public ParquetHiveSerDe() { parquetRow = new ParquetHiveRecord(); - stats = new SerDeStats(); } @Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { - - final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order @@ -128,19 +113,11 @@ public class ParquetHiveSerDe extends AbstractSerDe { } } this.objInspector = new ArrayWritableObjectInspector(completeTypeInfo, prunedTypeInfo); - - // Stats part - serializedSize = 0; - deserializedSize = 0; - status = LAST_OPERATION.UNKNOWN; } @Override public Object deserialize(final Writable blob) throws SerDeException { - status = LAST_OPERATION.DESERIALIZE; - deserializedSize = 0; if (blob instanceof ArrayWritable) { - deserializedSize = ((ArrayWritable) blob).get().length; return blob; } else { return null; @@ -163,23 +140,21 @@ public class ParquetHiveSerDe extends AbstractSerDe { if (!objInspector.getCategory().equals(Category.STRUCT)) { throw new SerDeException("Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct"); } - serializedSize = ((StructObjectInspector)objInspector).getAllStructFieldRefs().size(); - status = LAST_OPERATION.SERIALIZE; + parquetRow.value = obj; parquetRow.inspector= (StructObjectInspector)objInspector; return parquetRow; } + /** + * Return null for Parquet format and stats is collected in ParquetRecordWriterWrapper when writer gets + * closed. + * + * @return null + */ @Override public SerDeStats getSerDeStats() { - // must be different - assert (status != LAST_OPERATION.UNKNOWN); - if (status == LAST_OPERATION.SERIALIZE) { - stats.setRawDataSize(serializedSize); - } else { - stats.setRawDataSize(deserializedSize); - } - return stats; + return null; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java index db8a332..5d4131a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java @@ -22,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -32,21 +33,26 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.util.Progressable; - +import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.util.ContextUtil; +import org.apache.parquet.hadoop.util.HadoopInputFile; public class ParquetRecordWriterWrapper implements RecordWriter<NullWritable, ParquetHiveRecord>, - org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { + StatsProvidingRecordWriter, org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordWriterWrapper.class); private final org.apache.hadoop.mapreduce.RecordWriter<NullWritable, ParquetHiveRecord> realWriter; private final TaskAttemptContext taskContext; - + private final JobConf jobConf; + private final Path file; + private SerDeStats stats; public ParquetRecordWriterWrapper( final OutputFormat<Void, ParquetHiveRecord> realOutputFormat, final JobConf jobConf, @@ -66,8 +72,12 @@ public class ParquetRecordWriterWrapper implements RecordWriter<NullWritable, Pa LOG.info("creating real writer to write at " + name); + this.jobConf = jobConf; + this.file = new Path(name); + realWriter = - ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); + ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, this.file); + LOG.info("real writer: " + realWriter); } catch (final InterruptedException e) { @@ -128,6 +138,21 @@ public class ParquetRecordWriterWrapper implements RecordWriter<NullWritable, Pa } catch (final InterruptedException e) { throw new IOException(e); } + + // Collect file stats + try { + ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(this.file, this.jobConf)); + long totalSize = 0; + for (BlockMetaData block : reader.getFooter().getBlocks()) { + totalSize += block.getTotalByteSize(); + } + + stats = new SerDeStats(); + stats.setRowCount(reader.getRecordCount()); + stats.setRawDataSize(totalSize); + } catch(IOException e) { + // Ignore + } } @Override @@ -149,4 +174,9 @@ public class ParquetRecordWriterWrapper implements RecordWriter<NullWritable, Pa write(null, (ParquetHiveRecord) w); } + @Override + public SerDeStats getStats() { + return stats; + } + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java index 06f27b5..8d32f43 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java @@ -115,7 +115,8 @@ public class TestParquetSerDe extends TestCase { assertEquals(wb[0], boi.getStructFieldData(awb, b)); } - private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException { + private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) + throws SerDeException { // Get the row structure final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); @@ -123,13 +124,13 @@ public class TestParquetSerDe extends TestCase { // Deserialize final Object row = serDe.deserialize(t); assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class); - assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length); assertEquals("deserialization gives the wrong object", t, row); // Serialize final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi); - assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable)serializedArr.getObject()).get().length); - assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable)serializedArr.getObject())); + assertTrue("serialized object should be equal to starting object", + arrayWritableEquals(t, (ArrayWritable)serializedArr.getObject())); + assertEquals("Stats are not collected during serialization and deserialization", null, serDe.getSerDeStats()); } private Properties createProperties() { diff --git a/ql/src/test/queries/clientpositive/parquet_stats.q b/ql/src/test/queries/clientpositive/parquet_stats.q new file mode 100644 index 0000000..92eaadb --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_stats.q @@ -0,0 +1,12 @@ + +DROP TABLE if exists parquet_stats; + +CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET; + +SET hive.stats.autogather=true; +INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1'); +DESC FORMATTED parquet_stats; + diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 09435b3..6b76e26 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -4059,7 +4059,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 8357 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4069,7 +4069,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 8357 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -5098,7 +5098,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 6165 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -5108,7 +5108,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 6165 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_analyze.q.out b/ql/src/test/results/clientpositive/parquet_analyze.q.out index 16c836d..f2088fd 100644 --- a/ql/src/test/results/clientpositive/parquet_analyze.q.out +++ b/ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -93,7 +93,7 @@ Table Parameters: bucketing_version 2 numFiles 1 numRows 100 - rawDataSize 700 + rawDataSize 5936 totalSize 6730 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/parquet_stats.q.out b/ql/src/test/results/clientpositive/parquet_stats.q.out new file mode 100644 index 0000000..007b9a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_stats.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: DROP TABLE if exists parquet_stats +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE if exists parquet_stats +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_stats +POSTHOOK: query: CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_stats +PREHOOK: query: INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@parquet_stats +POSTHOOK: query: INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@parquet_stats +POSTHOOK: Lineage: parquet_stats.id SCRIPT [] +POSTHOOK: Lineage: parquet_stats.str SCRIPT [] +PREHOOK: query: DESC FORMATTED parquet_stats +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_stats +POSTHOOK: query: DESC FORMATTED parquet_stats +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_stats +# col_name data_type comment +id int +str string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"str\":\"true\"}} + bucketing_version 2 + numFiles 1 + numRows 2 + rawDataSize 146 + totalSize 469 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git a/ql/src/test/results/clientpositive/spark/parquet_join.q.out b/ql/src/test/results/clientpositive/spark/parquet_join.q.out index 0a092d2..b1b1d6c 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_join.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -87,37 +87,37 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -128,14 +128,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -184,14 +184,14 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -208,14 +208,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -225,14 +225,14 @@ STAGE PLANS: outputColumnNames: _col2 input vertices: 0 Map 1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -319,14 +319,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -334,14 +334,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out index e298dab..c0bfa4d 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out @@ -38,7 +38,7 @@ STAGE PLANS: TableScan alias: date_decimal_test_parquet filterExpr: (cint is not null and cdouble is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -47,7 +47,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:double)) predicate: (cdouble is not null and cint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date), cdecimal (type: decimal(20,10)) outputColumnNames: _col0, _col1 @@ -55,19 +55,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3] - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out index 88b97d5..ab1de3c 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out @@ -77,15 +77,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet_part_n0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 2.0D) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -109,13 +109,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 490 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 490 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 5f06e4e..d3c55a3 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -6988,11 +6988,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_parquet - Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), UDFToDouble(hr) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -7001,7 +7001,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: double) input vertices: 1 Map 3 - Statistics: Num rows: 2200 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2200 Data size: 25709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out index a6e99f8..e37cd5d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out @@ -78,20 +78,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -178,22 +178,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -219,14 +219,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -420,20 +420,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -514,22 +514,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Map Vectorization: enabled: false @@ -549,14 +549,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -750,20 +750,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -850,22 +850,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -891,14 +891,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1565,17 +1565,17 @@ STAGE PLANS: TableScan alias: parquettbl filterExpr: (UDFToInteger((t1 + t2)) > 10) (type: boolean) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 126 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToInteger((t1 + t2)) > 10) (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t1 (type: tinyint), t2 (type: tinyint), (t1 + t2) (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out b/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out index 44667a5..87ab29b 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out @@ -139,14 +139,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), m1 (type: map<string,string>) outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -221,9 +221,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -309,22 +309,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -350,10 +350,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -407,22 +407,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: m1['color'] (type: string) outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Map Vectorization: enabled: true @@ -444,10 +444,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -524,9 +524,9 @@ STAGE PLANS: TableScan alias: parquet_nullsplit filterExpr: (len = '1') (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out b/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out index b52858d..e141ab7 100644 --- a/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out +++ b/ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out @@ -213,9 +213,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -489,9 +489,9 @@ STAGE PLANS: TableScan alias: parquet_nullsplit filterExpr: (len = '1') (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash