This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f71a50417b4 HIVE-28009: Shared work optimizer ignores schema merge 
setting in case of virtual column difference (Krisztian Kasa, reviewed by Denys 
Kuzmenko)
f71a50417b4 is described below

commit f71a50417b4f1271496d9e4a04aefd5cbc906f71
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Mon Jan 22 14:18:27 2024 +0100

    HIVE-28009: Shared work optimizer ignores schema merge setting in case of 
virtual column difference (Krisztian Kasa, reviewed by Denys Kuzmenko)
---
 ...merge_iceberg_copy_on_write_unpartitioned.q.out |  28 +-
 .../update_iceberg_copy_on_write_partitioned.q.out |  86 ++--
 ...pdate_iceberg_copy_on_write_unpartitioned.q.out |  88 ++---
 .../hive/ql/optimizer/SharedWorkOptimizer.java     |   5 +-
 .../sharedwork_virtualcol_schema_merge.q           |  35 ++
 .../llap/sharedwork_virtualcol_schema_merge.q.out  | 432 +++++++++++++++++++++
 6 files changed, 571 insertions(+), 103 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out
index 6728fbf39bf..fb70b3e6af9 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out
@@ -166,20 +166,6 @@ STAGE PLANS:
                       Map-reduce partition columns: _col4 (type: int)
                       Statistics: Num rows: 4 Data size: 832 Basic stats: 
COMPLETE Column stats: COMPLETE
                       value expressions: _col0 (type: int), _col1 (type: 
bigint), _col2 (type: string), _col3 (type: bigint)
-                  Filter Operator
-                    predicate: (a is not null and FILE__PATH is not null) 
(type: boolean)
-                    Statistics: Num rows: 4 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: FILE__PATH (type: string), a (type: int)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 4 Data size: 752 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: int)
-                        null sort order: z
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: int)
-                        Statistics: Num rows: 4 Data size: 752 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string)
                   Filter Operator
                     predicate: (FILE__PATH is not null and a is not null) 
(type: boolean)
                     Statistics: Num rows: 4 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -194,6 +180,20 @@ STAGE PLANS:
                         Map-reduce partition columns: _col4 (type: int)
                         Statistics: Num rows: 4 Data size: 1196 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: int), _col1 (type: 
bigint), _col2 (type: string), _col3 (type: bigint), _col5 (type: string), 
_col6 (type: int)
+                  Filter Operator
+                    predicate: (a is not null and FILE__PATH is not null) 
(type: boolean)
+                    Statistics: Num rows: 4 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: FILE__PATH (type: string), a (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 4 Data size: 752 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 4 Data size: 752 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: string)
                   Filter Operator
                     predicate: a is not null (type: boolean)
                     Statistics: Num rows: 4 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out
index 7a5b872fa11..724b0631f95 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out
@@ -1227,9 +1227,9 @@ STAGE PLANS:
         Reducer 3 <- Reducer 14 (XPROD_EDGE), Reducer 2 (XPROD_EDGE)
         Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 
5 (CONTAINS)
         Reducer 6 <- Union 5 (SIMPLE_EDGE)
-        Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
-        Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 5 (CONTAINS)
-        Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 5 
(CONTAINS)
+        Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 5 
(CONTAINS)
+        Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
+        Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1253,29 +1253,29 @@ STAGE PLANS:
                         value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 
(type: bigint)
                   Filter Operator
                     predicate: a is not null (type: boolean)
-                    Statistics: Num rows: 9 Data size: 873 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 9 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
+                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
+                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
                     Select Operator
-                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
+                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
+                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
                     Select Operator
                       expressions: a (type: int)
                       outputColumnNames: _col0
@@ -1546,6 +1546,27 @@ STAGE PLANS:
                       serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
                       name: default.tbl_ice
         Reducer 7 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: int), _col3 (type: bigint), _col4 
(type: string), _col5 (type: bigint), _col0 (type: int), 'Changed forever' 
(type: string), _col1 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
+                  Statistics: Num rows: 1 Data size: 311 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: iceberg_bucket(_col4, 16) (type: int), 
iceberg_truncate(_col5, 3) (type: string)
+                    null sort order: aa
+                    sort order: ++
+                    Map-reduce partition columns: iceberg_bucket(_col4, 16) 
(type: int), iceberg_truncate(_col5, 3) (type: string)
+                    Statistics: Num rows: 3 Data size: 913 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: int), _col1 (type: 
bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: int), _col5 
(type: string), _col6 (type: int)
+        Reducer 8 
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
@@ -1562,7 +1583,7 @@ STAGE PLANS:
                   Map-reduce partition columns: _col5 (type: string)
                   Statistics: Num rows: 1 Data size: 293 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: int), _col3 (type: int), _col4 (type: bigint)
-        Reducer 8 
+        Reducer 9 
             Execution mode: vectorized
             Reduce Operator Tree:
               Select Operator
@@ -1602,27 +1623,6 @@ STAGE PLANS:
                         Map-reduce partition columns: iceberg_bucket(_col4, 
16) (type: int), iceberg_truncate(_col5, 3) (type: string)
                         Statistics: Num rows: 3 Data size: 913 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: int), _col1 (type: 
bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: int), _col5 
(type: string), _col6 (type: int)
-        Reducer 9 
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Left Semi Join 0 to 1
-                keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Select Operator
-                  expressions: _col2 (type: int), _col3 (type: bigint), _col4 
(type: string), _col5 (type: bigint), _col0 (type: int), 'Changed forever' 
(type: string), _col1 (type: int)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
-                  Statistics: Num rows: 1 Data size: 311 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: iceberg_bucket(_col4, 16) (type: int), 
iceberg_truncate(_col5, 3) (type: string)
-                    null sort order: aa
-                    sort order: ++
-                    Map-reduce partition columns: iceberg_bucket(_col4, 16) 
(type: int), iceberg_truncate(_col5, 3) (type: string)
-                    Statistics: Num rows: 3 Data size: 913 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    value expressions: _col0 (type: int), _col1 (type: 
bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: int), _col5 
(type: string), _col6 (type: int)
         Union 5 
             Vertex: Union 5
 
@@ -1772,29 +1772,29 @@ STAGE PLANS:
                         value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 
(type: bigint)
                   Filter Operator
                     predicate: a is not null (type: boolean)
-                    Statistics: Num rows: 9 Data size: 873 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 9 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
+                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
+                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
                     Select Operator
-                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
+                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
+                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
                     Select Operator
                       expressions: a (type: int)
                       outputColumnNames: _col0
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out
index 98c24ce144c..edbb1b2523e 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out
@@ -1199,9 +1199,9 @@ STAGE PLANS:
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 13 (XPROD_EDGE), Reducer 2 (XPROD_EDGE)
         Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 
5 (CONTAINS)
-        Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
-        Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 5 (CONTAINS)
-        Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 
(CONTAINS)
+        Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 
(CONTAINS)
+        Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+        Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 5 (CONTAINS)
         Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
@@ -1226,29 +1226,29 @@ STAGE PLANS:
                         value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 
(type: bigint)
                   Filter Operator
                     predicate: a is not null (type: boolean)
-                    Statistics: Num rows: 9 Data size: 873 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 9 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
+                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
+                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
                     Select Operator
-                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
+                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
+                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
                     Select Operator
                       expressions: a (type: int)
                       outputColumnNames: _col0
@@ -1454,6 +1454,28 @@ STAGE PLANS:
                         serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
                         name: default.tbl_ice
         Reducer 6 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: int), _col3 (type: bigint), _col4 
(type: string), _col5 (type: bigint), _col0 (type: int), 'Changed forever' 
(type: string), _col1 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
+                  Statistics: Num rows: 1 Data size: 311 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 913 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+                        output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+                        serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+                        name: default.tbl_ice
+        Reducer 7 
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
@@ -1470,7 +1492,7 @@ STAGE PLANS:
                   Map-reduce partition columns: _col5 (type: string)
                   Statistics: Num rows: 1 Data size: 293 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: int), _col3 (type: int), _col4 (type: bigint)
-        Reducer 7 
+        Reducer 8 
             Execution mode: vectorized
             Reduce Operator Tree:
               Select Operator
@@ -1511,28 +1533,6 @@ STAGE PLANS:
                             output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
                             serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
                             name: default.tbl_ice
-        Reducer 8 
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Left Semi Join 0 to 1
-                keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Select Operator
-                  expressions: _col2 (type: int), _col3 (type: bigint), _col4 
(type: string), _col5 (type: bigint), _col0 (type: int), 'Changed forever' 
(type: string), _col1 (type: int)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
-                  Statistics: Num rows: 1 Data size: 311 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 3 Data size: 913 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
-                        output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
-                        serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
-                        name: default.tbl_ice
         Reducer 9 
             Reduce Operator Tree:
               Merge Join Operator
@@ -1732,29 +1732,29 @@ STAGE PLANS:
                         value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 
(type: bigint)
                   Filter Operator
                     predicate: a is not null (type: boolean)
-                    Statistics: Num rows: 9 Data size: 873 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 9 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
+                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
+                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
                     Select Operator
-                      expressions: a (type: int), c (type: int), 
PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH 
(type: string), ROW__POSITION (type: bigint)
+                      expressions: a (type: int), b (type: string), c (type: 
int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), 
FILE__PATH (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-                      Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 9 Data size: 1908 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
+                        Statistics: Num rows: 9 Data size: 2637 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string)
                     Select Operator
                       expressions: a (type: int)
                       outputColumnNames: _col0
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index 67437aed362..2f80bcfdb58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -649,8 +649,9 @@ public class SharedWorkOptimizer extends Transform {
   }
 
   private static boolean compatibleSchema(TableScanOperator tsOp1, 
TableScanOperator tsOp2) {
-    return tsOp1.getNeededColumns().equals(tsOp2.getNeededColumns())
-        && tsOp1.getNeededColumnIDs().equals(tsOp2.getNeededColumnIDs());
+    return Objects.equals(tsOp1.getNeededColumns(), tsOp2.getNeededColumns())
+        && Objects.equals(tsOp1.getNeededColumnIDs(), 
tsOp2.getNeededColumnIDs())
+        && Objects.equals(tsOp1.getConf().getVirtualCols(), 
tsOp2.getConf().getVirtualCols());
   }
 
 
diff --git 
a/ql/src/test/queries/clientpositive/sharedwork_virtualcol_schema_merge.q 
b/ql/src/test/queries/clientpositive/sharedwork_virtualcol_schema_merge.q
new file mode 100644
index 00000000000..c471201222a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sharedwork_virtualcol_schema_merge.q
@@ -0,0 +1,35 @@
+set hive.optimize.shared.work.merge.ts.schema=false;
+
+create table t1(a int);
+
+-- 3 map vertices scans table t1
+explain
+WITH t AS (
+  select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from (
+    select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a, row_number() 
OVER (partition by INPUT__FILE__NAME) rn from t1
+    where a = 1
+  ) q
+  where rn=1
+)
+select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from t1 where NOT (a 
= 1) AND INPUT__FILE__NAME IN (select INPUT__FILE__NAME from t)
+union all
+select * from t;
+
+
+
+set hive.optimize.shared.work.merge.ts.schema=true;
+
+-- 2 of 3 map vertices scanning table t1 are merged:
+-- One projects BLOCK__OFFSET__INSIDE__FILE and INPUT__FILE__NAME and the 
second one projects INPUT__FILE__NAME only.
+-- These are merged to one scan which projects both.
+explain
+WITH t AS (
+  select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from (
+    select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a, row_number() 
OVER (partition by INPUT__FILE__NAME) rn from t1
+    where a = 1
+  ) q
+  where rn=1
+)
+select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from t1 where NOT (a 
= 1) AND INPUT__FILE__NAME IN (select INPUT__FILE__NAME from t)
+union all
+select * from t;
diff --git 
a/ql/src/test/results/clientpositive/llap/sharedwork_virtualcol_schema_merge.q.out
 
b/ql/src/test/results/clientpositive/llap/sharedwork_virtualcol_schema_merge.q.out
new file mode 100644
index 00000000000..0f42feefbde
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/sharedwork_virtualcol_schema_merge.q.out
@@ -0,0 +1,432 @@
+PREHOOK: query: create table t1(a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1(a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain
+WITH t AS (
+  select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from (
+    select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a, row_number() 
OVER (partition by INPUT__FILE__NAME) rn from t1
+    where a = 1
+  ) q
+  where rn=1
+)
+select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from t1 where NOT (a 
= 1) AND INPUT__FILE__NAME IN (select INPUT__FILE__NAME from t)
+union all
+select * from t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+WITH t AS (
+  select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from (
+    select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a, row_number() 
OVER (partition by INPUT__FILE__NAME) rn from t1
+    where a = 1
+  ) q
+  where rn=1
+)
+select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from t1 where NOT (a 
= 1) AND INPUT__FILE__NAME IN (select INPUT__FILE__NAME from t)
+union all
+select * from t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Union 3 
(CONTAINS)
+        Reducer 5 <- Map 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: ((a <> 1) and INPUT__FILE__NAME is not null) 
(type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: ((a <> 1) and INPUT__FILE__NAME is not null) 
(type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), BLOCK__OFFSET__INSIDE__FILE 
(type: bigint), INPUT__FILE__NAME (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col1 (type: 
bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: ((a = 1) and INPUT__FILE__NAME is not null) 
(type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: ((a = 1) and INPUT__FILE__NAME is not null) 
(type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: INPUT__FILE__NAME (type: string)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: INPUT__FILE__NAME (type: 
string)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: (a = 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (a = 1) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: INPUT__FILE__NAME (type: string)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: INPUT__FILE__NAME (type: 
string)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: BLOCK__OFFSET__INSIDE__FILE (type: 
bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col2 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: bigint), _col2 (type: string), 
_col0 (type: int)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col2: string
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col2 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: row_number_window_0
+                              name: row_number
+                              window function: GenericUDAFRowNumberEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (row_number_window_0 = 1) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Select Operator
+                      expressions: _col2 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        minReductionHashAggr: 0.99
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          null sort order: z
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 7 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col1 (type: bigint), KEY.reducesinkkey0 
(type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col2 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: row_number_window_0
+                              name: row_number
+                              window function: GenericUDAFRowNumberEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (row_number_window_0 = 1) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Select Operator
+                      expressions: _col1 (type: bigint), _col2 (type: string), 
1 (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Union 3 
+            Vertex: Union 3
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+WITH t AS (
+  select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from (
+    select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a, row_number() 
OVER (partition by INPUT__FILE__NAME) rn from t1
+    where a = 1
+  ) q
+  where rn=1
+)
+select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from t1 where NOT (a 
= 1) AND INPUT__FILE__NAME IN (select INPUT__FILE__NAME from t)
+union all
+select * from t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+WITH t AS (
+  select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from (
+    select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a, row_number() 
OVER (partition by INPUT__FILE__NAME) rn from t1
+    where a = 1
+  ) q
+  where rn=1
+)
+select BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, a from t1 where NOT (a 
= 1) AND INPUT__FILE__NAME IN (select INPUT__FILE__NAME from t)
+union all
+select * from t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 3 
(CONTAINS)
+        Reducer 4 <- Map 1 (SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: (((a <> 1) and INPUT__FILE__NAME is not null) or 
((a = 1) and INPUT__FILE__NAME is not null)) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: ((a <> 1) and INPUT__FILE__NAME is not null) 
(type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), BLOCK__OFFSET__INSIDE__FILE 
(type: bigint), INPUT__FILE__NAME (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col1 (type: 
bigint)
+                  Filter Operator
+                    predicate: ((a = 1) and INPUT__FILE__NAME is not null) 
(type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: INPUT__FILE__NAME (type: string)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: INPUT__FILE__NAME (type: 
string)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: (a = 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (a = 1) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: INPUT__FILE__NAME (type: string)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: INPUT__FILE__NAME (type: 
string)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: BLOCK__OFFSET__INSIDE__FILE (type: 
bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col2 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: bigint), _col2 (type: string), 
_col0 (type: int)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col2: string
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col2 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: row_number_window_0
+                              name: row_number
+                              window function: GenericUDAFRowNumberEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (row_number_window_0 = 1) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Select Operator
+                      expressions: _col2 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        minReductionHashAggr: 0.99
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          null sort order: z
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 6 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col1 (type: bigint), KEY.reducesinkkey0 
(type: string)
+                outputColumnNames: _col1, _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col2 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: row_number_window_0
+                              name: row_number
+                              window function: GenericUDAFRowNumberEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (row_number_window_0 = 1) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Select Operator
+                      expressions: _col1 (type: bigint), _col2 (type: string), 
1 (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Union 3 
+            Vertex: Union 3
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

Reply via email to