This is an automated email from the ASF dual-hosted git repository. tarmstrong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new a154b2d IMPALA-5861: fix RowsRead for zero-slot table scan a154b2d is described below commit a154b2d6e775a508df4fd2c8d51a18d5c1d1f933 Author: Tim Armstrong <tarmstr...@cloudera.com> AuthorDate: Fri Feb 1 07:13:56 2019 -0800 IMPALA-5861: fix RowsRead for zero-slot table scan Testing: Added regression test based on JIRA and a targeted test for all HDFS file formats. Change-Id: I7a927c6a4f0b8055608cb7a5e2b550a1610cef89 Reviewed-on: http://gerrit.cloudera.org:8080/12332 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/exec/parquet/hdfs-parquet-scanner.cc | 2 +- .../queries/QueryTest/mixed-format.test | 14 +++ .../queries/QueryTest/scanners.test | 111 +++++++++++++++++++++ 3 files changed, 126 insertions(+), 1 deletion(-) diff --git a/be/src/exec/parquet/hdfs-parquet-scanner.cc b/be/src/exec/parquet/hdfs-parquet-scanner.cc index 4fe9914..3836d0b 100644 --- a/be/src/exec/parquet/hdfs-parquet-scanner.cc +++ b/be/src/exec/parquet/hdfs-parquet-scanner.cc @@ -400,7 +400,7 @@ Status HdfsParquetScanner::GetNextInternal(RowBatch* row_batch) { assemble_rows_timer_.Stop(); RETURN_IF_ERROR(status); row_group_rows_read_ += max_tuples; - COUNTER_ADD(scan_node_->rows_read_counter(), row_group_rows_read_); + COUNTER_ADD(scan_node_->rows_read_counter(), max_tuples); return Status::OK(); } diff --git a/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test b/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test index 0b693e1..2d5bf9e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test +++ b/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test @@ -24,3 +24,17 @@ bigint, bigint ---- RESULTS 280,1260 ==== +---- QUERY +# IMPALA-5861: RowsRead counter should be accurate for table scan that returns +# zero slots. This test is run with various batch_size values, which helps +# reproduce the bug. Scanning multiple file formats triggers the bug because +# the Parquet count(*) rewrite is disabled when non-Parquet file formats are +# present. +select count(*) from functional.alltypesmixedformat +---- TYPES +bigint +---- RESULTS +1200 +---- RUNTIME_PROFILE +aggregation(SUM, RowsRead): 1200 +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/scanners.test b/testdata/workloads/functional-query/queries/QueryTest/scanners.test index b05786e..72d6505 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/scanners.test +++ b/testdata/workloads/functional-query/queries/QueryTest/scanners.test @@ -128,3 +128,114 @@ select count(*) from alltypessmall ---- TYPES BIGINT ==== +---- QUERY +# IMPALA-5861: RowsRead counter should be accurate for table scan that materializes +# zero slots from this files. This test is run with various batch_size values, +# which helps reproduce the Parquet bug. +select 1 from alltypessmall +---- TYPES +tinyint +---- RESULTS +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---- RUNTIME_PROFILE +aggregation(SUM, RowsRead): 100 +====