[hive] branch master updated: HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set the selected array (#3558) (Adam Szita, reviewed by Laszlo Pinter)

szita Wed, 31 Aug 2022 02:00:21 -0700

This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 3ffc2b403c4 HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set 
the selected array (#3558) (Adam Szita, reviewed by Laszlo Pinter)
3ffc2b403c4 is described below

commit 3ffc2b403c4193ef7d9b7b9deda834eb60ea5ceb
Author: Adam Szita <40628386+sz...@users.noreply.github.com>
AuthorDate: Wed Aug 31 10:59:09 2022 +0200

    HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set the selected 
array (#3558) (Adam Szita, reviewed by Laszlo Pinter)
---
 .../vector/HiveIcebergVectorizedRecordReader.java  |   4 +
 .../test/queries/positive/llap_iceberg_read_orc.q  |  28 ++++
 .../positive/llap/llap_iceberg_read_orc.q.out      | 154 +++++++++++++++++++++
 3 files changed, 186 insertions(+)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
index ddabc27932f..412f7478090 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
@@ -49,7 +49,11 @@ public final class HiveIcebergVectorizedRecordReader extends 
AbstractMapredIcebe
         VectorizedRowBatch newBatch = (VectorizedRowBatch) 
innerReader.getCurrentValue();
         value.cols = newBatch.cols;
         value.endOfFile = newBatch.endOfFile;
+        value.numCols = newBatch.numCols;
+        value.projectedColumns = newBatch.projectedColumns;
+        value.projectionSize = newBatch.projectionSize;
         value.selectedInUse = newBatch.selectedInUse;
+        value.selected = newBatch.selected;
         value.size = newBatch.size;
         return true;
       } else {
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q 
b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
index 2d62dd844ba..a450bb68d80 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
@@ -41,6 +41,34 @@ INSERT INTO llap_orders VALUES
 (19, 54, 6, timestamp('2015-08-15 01:59:22.177'), 'EU', 'HU'),
 (20, 10, 0, timestamp('2018-05-06 12:56:12.789'), 'US', 'CA');
 
+--verify row level filtering works with Iceberg ORC too
+set hive.auto.convert.join=true;
+set hive.disable.unsafe.external.table.operations=false;
+set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true;
+
+explain select sum(quantity)
+    from llap_orders o, llap_items i
+    where
+        o.itemid = i.itemid and i.price != 83000 and
+        (
+            (o.quantity > 0 and o.quantity < 39)
+                or
+            (o.quantity > 39 and o.quantity < 69)
+                or
+            (o.quantity > 70 )
+        );
+select sum(quantity)
+from llap_orders o, llap_items i
+where
+    o.itemid = i.itemid and i.price != 83000 and
+    (
+        (o.quantity > 0 and o.quantity < 39)
+            or
+        (o.quantity > 39 and o.quantity < 69)
+            or
+        (o.quantity > 70 )
+    );
+
 --select query without any schema change yet
 SELECT i.name, i.description, SUM(o.quantity) FROM llap_items i JOIN 
llap_orders o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 50000 
GROUP BY i.name, i.description;
 
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
index 625c180f34d..440ee72dd0c 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
@@ -98,6 +98,160 @@ POSTHOOK: query: INSERT INTO llap_orders VALUES
 POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: default@llap_orders
+PREHOOK: query: explain select sum(quantity)
+    from llap_orders o, llap_items i
+    where
+        o.itemid = i.itemid and i.price != 83000 and
+        (
+            (o.quantity > 0 and o.quantity < 39)
+                or
+            (o.quantity > 39 and o.quantity < 69)
+                or
+            (o.quantity > 70 )
+        )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@llap_items
+PREHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(quantity)
+    from llap_orders o, llap_items i
+    where
+        o.itemid = i.itemid and i.price != 83000 and
+        (
+            (o.quantity > 0 and o.quantity < 39)
+                or
+            (o.quantity > 39 and o.quantity < 69)
+                or
+            (o.quantity > 70 )
+        )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@llap_items
+POSTHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: o
+                  filterExpr: ((quantity NOT BETWEEN 39 AND 0 or quantity NOT 
BETWEEN 69 AND 39 or (quantity > 70)) and (((quantity > 0) and (quantity < 39)) 
or ((quantity > 39) and (quantity < 69)) or (quantity > 70)) and itemid is not 
null) (type: boolean)
+                  probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_29_container, 
bigKeyColName:itemid, smallTablePos:1, keyRatio:0.9523809523809523
+                  Statistics: Num rows: 21 Data size: 168 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((quantity NOT BETWEEN 39 AND 0 or quantity NOT 
BETWEEN 69 AND 39 or (quantity > 70)) and (((quantity > 0) and (quantity < 39)) 
or ((quantity > 39) and (quantity < 69)) or (quantity > 70)) and itemid is not 
null) (type: boolean)
+                    Statistics: Num rows: 21 Data size: 168 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: quantity (type: int), itemid (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 21 Data size: 168 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col1 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 21 Data size: 84 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: sum(_col0)
+                          minReductionHashAggr: 0.95238096
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            null sort order: 
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: i
+                  filterExpr: ((price <> 83000) and itemid is not null) (type: 
boolean)
+                  Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((price <> 83000) and itemid is not null) 
(type: boolean)
+                    Statistics: Num rows: 7 Data size: 56 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: itemid (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 7 Data size: 28 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 7 Data size: 28 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(quantity)
+from llap_orders o, llap_items i
+where
+    o.itemid = i.itemid and i.price != 83000 and
+    (
+        (o.quantity > 0 and o.quantity < 39)
+            or
+        (o.quantity > 39 and o.quantity < 69)
+            or
+        (o.quantity > 70 )
+    )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@llap_items
+PREHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(quantity)
+from llap_orders o, llap_items i
+where
+    o.itemid = i.itemid and i.price != 83000 and
+    (
+        (o.quantity > 0 and o.quantity < 39)
+            or
+        (o.quantity > 39 and o.quantity < 69)
+            or
+        (o.quantity > 70 )
+    )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@llap_items
+POSTHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+774
 PREHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items 
i JOIN llap_orders o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 
50000 GROUP BY i.name, i.description
 PREHOOK: type: QUERY
 PREHOOK: Input: default@llap_items

[hive] branch master updated: HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set the selected array (#3558) (Adam Szita, reviewed by Laszlo Pinter)

Reply via email to