This is an automated email from the ASF dual-hosted git repository. szita pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 3ffc2b403c4 HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set the selected array (#3558) (Adam Szita, reviewed by Laszlo Pinter) 3ffc2b403c4 is described below commit 3ffc2b403c4193ef7d9b7b9deda834eb60ea5ceb Author: Adam Szita <40628386+sz...@users.noreply.github.com> AuthorDate: Wed Aug 31 10:59:09 2022 +0200 HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set the selected array (#3558) (Adam Szita, reviewed by Laszlo Pinter) --- .../vector/HiveIcebergVectorizedRecordReader.java | 4 + .../test/queries/positive/llap_iceberg_read_orc.q | 28 ++++ .../positive/llap/llap_iceberg_read_orc.q.out | 154 +++++++++++++++++++++ 3 files changed, 186 insertions(+) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java index ddabc27932f..412f7478090 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java @@ -49,7 +49,11 @@ public final class HiveIcebergVectorizedRecordReader extends AbstractMapredIcebe VectorizedRowBatch newBatch = (VectorizedRowBatch) innerReader.getCurrentValue(); value.cols = newBatch.cols; value.endOfFile = newBatch.endOfFile; + value.numCols = newBatch.numCols; + value.projectedColumns = newBatch.projectedColumns; + value.projectionSize = newBatch.projectionSize; value.selectedInUse = newBatch.selectedInUse; + value.selected = newBatch.selected; value.size = newBatch.size; return true; } else { diff --git a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q index 2d62dd844ba..a450bb68d80 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q @@ -41,6 +41,34 @@ INSERT INTO llap_orders VALUES (19, 54, 6, timestamp('2015-08-15 01:59:22.177'), 'EU', 'HU'), (20, 10, 0, timestamp('2018-05-06 12:56:12.789'), 'US', 'CA'); +--verify row level filtering works with Iceberg ORC too +set hive.auto.convert.join=true; +set hive.disable.unsafe.external.table.operations=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; + +explain select sum(quantity) + from llap_orders o, llap_items i + where + o.itemid = i.itemid and i.price != 83000 and + ( + (o.quantity > 0 and o.quantity < 39) + or + (o.quantity > 39 and o.quantity < 69) + or + (o.quantity > 70 ) + ); +select sum(quantity) +from llap_orders o, llap_items i +where + o.itemid = i.itemid and i.price != 83000 and + ( + (o.quantity > 0 and o.quantity < 39) + or + (o.quantity > 39 and o.quantity < 69) + or + (o.quantity > 70 ) + ); + --select query without any schema change yet SELECT i.name, i.description, SUM(o.quantity) FROM llap_items i JOIN llap_orders o ON i.itemid = o.itemid WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description; diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out index 625c180f34d..440ee72dd0c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out @@ -98,6 +98,160 @@ POSTHOOK: query: INSERT INTO llap_orders VALUES POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@llap_orders +PREHOOK: query: explain select sum(quantity) + from llap_orders o, llap_items i + where + o.itemid = i.itemid and i.price != 83000 and + ( + (o.quantity > 0 and o.quantity < 39) + or + (o.quantity > 39 and o.quantity < 69) + or + (o.quantity > 70 ) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@llap_items +PREHOOK: Input: default@llap_orders +#### A masked pattern was here #### +POSTHOOK: query: explain select sum(quantity) + from llap_orders o, llap_items i + where + o.itemid = i.itemid and i.price != 83000 and + ( + (o.quantity > 0 and o.quantity < 39) + or + (o.quantity > 39 and o.quantity < 69) + or + (o.quantity > 70 ) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@llap_items +POSTHOOK: Input: default@llap_orders +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: o + filterExpr: ((quantity NOT BETWEEN 39 AND 0 or quantity NOT BETWEEN 69 AND 39 or (quantity > 70)) and (((quantity > 0) and (quantity < 39)) or ((quantity > 39) and (quantity < 69)) or (quantity > 70)) and itemid is not null) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_29_container, bigKeyColName:itemid, smallTablePos:1, keyRatio:0.9523809523809523 + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((quantity NOT BETWEEN 39 AND 0 or quantity NOT BETWEEN 69 AND 39 or (quantity > 70)) and (((quantity > 0) and (quantity < 39)) or ((quantity > 39) and (quantity < 69)) or (quantity > 70)) and itemid is not null) (type: boolean) + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: quantity (type: int), itemid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.95238096 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map 3 + Map Operator Tree: + TableScan + alias: i + filterExpr: ((price <> 83000) and itemid is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((price <> 83000) and itemid is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: itemid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(quantity) +from llap_orders o, llap_items i +where + o.itemid = i.itemid and i.price != 83000 and + ( + (o.quantity > 0 and o.quantity < 39) + or + (o.quantity > 39 and o.quantity < 69) + or + (o.quantity > 70 ) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@llap_items +PREHOOK: Input: default@llap_orders +#### A masked pattern was here #### +POSTHOOK: query: select sum(quantity) +from llap_orders o, llap_items i +where + o.itemid = i.itemid and i.price != 83000 and + ( + (o.quantity > 0 and o.quantity < 39) + or + (o.quantity > 39 and o.quantity < 69) + or + (o.quantity > 70 ) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@llap_items +POSTHOOK: Input: default@llap_orders +#### A masked pattern was here #### +774 PREHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items i JOIN llap_orders o ON i.itemid = o.itemid WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description PREHOOK: type: QUERY PREHOOK: Input: default@llap_items