(hive) branch master updated: HIVE-28126: Use added record count in cost model when rebuilding materialized view stored by iceberg (Krisztian Kasa, reviewed by Denys Kuzmenko, okumin)

krisztiankasa Tue, 09 Apr 2024 05:59:02 -0700

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new be857bf83c0 HIVE-28126: Use added record count in cost model when 
rebuilding materialized view stored by iceberg (Krisztian Kasa, reviewed by 
Denys Kuzmenko, okumin)
be857bf83c0 is described below

commit be857bf83c00e41a315a7cc8e013832cf169ef18
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Tue Apr 9 14:58:52 2024 +0200

    HIVE-28126: Use added record count in cost model when rebuilding 
materialized view stored by iceberg (Krisztian Kasa, reviewed by Denys 
Kuzmenko, okumin)
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  70 ++-
 .../mr/hive/TestHiveIcebergStorageHandler.java     |  59 +++
 .../src/test/queries/positive/mv_iceberg_orc4.q    |  19 +-
 .../test/results/positive/mv_iceberg_orc4.q.out    | 572 ++++++++++++++++++++-
 .../test/results/positive/mv_iceberg_orc7.q.out    |   2 +-
 .../desc/formatter/TextDescTableFormatter.java     |  10 +-
 .../AlterMaterializedViewRebuildAnalyzer.java      |  41 +-
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   |  53 +-
 .../hive/ql/metadata/HiveStorageHandler.java       |  26 +
 .../stats/HiveIncrementalRelMdRowCount.java        |  10 +
 .../llap/materialized_view_create_rewrite_7.q.out  |   2 +-
 .../materialized_view_create_rewrite_nulls.q.out   |   2 +-
 .../hadoop/hive/common/type/SnapshotContext.java   |  36 ++
 13 files changed, 845 insertions(+), 57 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 8aa833c4c18..369bc91fbcc 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -137,6 +137,7 @@ import org.apache.hadoop.mapred.Reporter;
 import org.apache.iceberg.BaseMetastoreTableOperations;
 import org.apache.iceberg.BaseTable;
 import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataOperations;
 import org.apache.iceberg.ExpireSnapshots;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.FileScanTask;
@@ -1618,7 +1619,58 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     if (current == null) {
       return null;
     }
-    return new SnapshotContext(current.snapshotId());
+    return toSnapshotContext(current);
+  }
+
+  private SnapshotContext toSnapshotContext(Snapshot snapshot) {
+    Map<String, String> summaryMap = snapshot.summary();
+    long addedRecords = getLongSummary(summaryMap, 
SnapshotSummary.ADDED_RECORDS_PROP);
+    long deletedRecords = getLongSummary(summaryMap, 
SnapshotSummary.DELETED_RECORDS_PROP);
+    return new SnapshotContext(
+        snapshot.snapshotId(), toWriteOperationType(snapshot.operation()), 
addedRecords, deletedRecords);
+  }
+
+  private SnapshotContext.WriteOperationType toWriteOperationType(String 
operation) {
+    try {
+      return 
SnapshotContext.WriteOperationType.valueOf(operation.toUpperCase());
+    } catch (NullPointerException | IllegalArgumentException ex) {
+      return SnapshotContext.WriteOperationType.UNKNOWN;
+    }
+  }
+
+  private long getLongSummary(Map<String, String> summaryMap, String key) {
+    String textValue = summaryMap.get(key);
+    if (StringUtils.isBlank(textValue)) {
+      return 0;
+    }
+    return Long.parseLong(textValue);
+  }
+
+  @Override
+  public Iterable<SnapshotContext> getSnapshotContexts(
+      org.apache.hadoop.hive.ql.metadata.Table hmsTable, SnapshotContext 
since) {
+
+    TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
+    Table table = IcebergTableUtil.getTable(conf, tableDesc.getProperties());
+    return getSnapshots(table.snapshots(), since);
+  }
+
+  @VisibleForTesting
+  Iterable<SnapshotContext> getSnapshots(Iterable<Snapshot> snapshots, 
SnapshotContext since) {
+    List<SnapshotContext> result = Lists.newArrayList();
+
+    boolean foundSince = Objects.isNull(since);
+    for (Snapshot snapshot : snapshots) {
+      if (!foundSince) {
+        if (snapshot.snapshotId() == since.getSnapshotId()) {
+          foundSince = true;
+        }
+      } else {
+        result.add(toSnapshotContext(snapshot));
+      }
+    }
+
+    return foundSince ? result : Collections.emptyList();
   }
 
   @Override
@@ -1692,6 +1744,20 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     }
   }
 
+  /**
+   * Check the operation type of all snapshots which are newer than the 
specified. The specified snapshot is excluded.
+   * @deprecated
+   * <br>Use {@link HiveStorageHandler#getSnapshotContexts(
+   * org.apache.hadoop.hive.ql.metadata.Table hmsTable, SnapshotContext since)}
+   * and check {@link SnapshotContext.WriteOperationType#APPEND}.equals({@link 
SnapshotContext#getOperation()}).
+   *
+   * @param hmsTable table metadata stored in Hive Metastore
+   * @param since the snapshot preceding the oldest snapshot which should be 
checked.
+   *              The value null means all should be checked.
+   * @return null if table is empty, true if all snapshots are {@link 
SnapshotContext.WriteOperationType#APPEND}s,
+   * false otherwise.
+   */
+  @Deprecated
   @Override
   public Boolean hasAppendsOnly(org.apache.hadoop.hive.ql.metadata.Table 
hmsTable, SnapshotContext since) {
     TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
@@ -1708,7 +1774,7 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
           foundSince = true;
         }
       } else {
-        if (!"append".equals(snapshot.operation())) {
+        if (!DataOperations.APPEND.equals(snapshot.operation())) {
           return false;
         }
       }
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandler.java
index 92697a4649d..1a2eddcf5f1 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandler.java
@@ -22,8 +22,12 @@ package org.apache.iceberg.mr.hive;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
+import org.apache.commons.collections4.IterableUtils;
 import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotSummary;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -51,10 +55,18 @@ public class TestHiveIcebergStorageHandler {
   @Before
   public void before() {
     when(anySnapshot.snapshotId()).thenReturn(42L);
+
     Mockito.lenient().when(appendSnapshot.snapshotId()).thenReturn(20L);
+    Map<String, String> summary = Maps.newHashMap();
+    summary.put(SnapshotSummary.ADDED_RECORDS_PROP, "12");
+    Mockito.lenient().when(appendSnapshot.summary()).thenReturn(summary);
     when(appendSnapshot.operation()).thenReturn("append");
+
     Mockito.lenient().when(deleteSnapshot.snapshotId()).thenReturn(100L);
     when(deleteSnapshot.operation()).thenReturn("delete");
+    summary = Maps.newHashMap();
+    summary.put(SnapshotSummary.DELETED_RECORDS_PROP, "3");
+    Mockito.lenient().when(deleteSnapshot.summary()).thenReturn(summary);
   }
 
   @Test
@@ -123,4 +135,51 @@ public class TestHiveIcebergStorageHandler {
 
     assertThat(result, is(nullValue()));
   }
+
+  @Test
+  public void testGetSnapshotContextsReturnsEmptyIterableWhenTableIsEmpty() {
+    SnapshotContext since = new SnapshotContext(42);
+
+    HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
+    Iterable<SnapshotContext> result = 
storageHandler.getSnapshots(Collections.emptyList(), since);
+
+    assertThat(result.iterator().hasNext(), is(false));
+  }
+
+  @Test
+  public void 
testGetSnapshotContextsReturnsEmptyIterableWhenTableIsEmptyAndGivenSnapShotIsNull()
 {
+    HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
+    Iterable<SnapshotContext> result = 
storageHandler.getSnapshots(Collections.emptyList(), null);
+
+    assertThat(result.iterator().hasNext(), is(false));
+  }
+
+  @Test
+  public void 
testGetSnapshotContextsReturnsAllSnapshotsWhenGivenSnapshotIsNull() {
+    HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
+    Iterable<SnapshotContext> result = 
storageHandler.getSnapshots(asList(appendSnapshot, deleteSnapshot), null);
+
+    List<SnapshotContext> resultList = IterableUtils.toList(result);
+    assertThat(resultList.size(), is(2));
+    assertThat(resultList.get(0).getSnapshotId(), 
is(appendSnapshot.snapshotId()));
+    assertThat(resultList.get(0).getOperation(), 
is(SnapshotContext.WriteOperationType.APPEND));
+    assertThat(resultList.get(0).getAddedRowCount(), is(12L));
+    assertThat(resultList.get(0).getDeletedRowCount(), is(0L));
+
+    assertThat(resultList.get(1).getSnapshotId(), 
is(deleteSnapshot.snapshotId()));
+    assertThat(resultList.get(1).getOperation(), 
is(SnapshotContext.WriteOperationType.DELETE));
+    assertThat(resultList.get(1).getAddedRowCount(), is(0L));
+    assertThat(resultList.get(1).getDeletedRowCount(), is(3L));
+  }
+
+  @Test
+  public void 
testGetSnapshotContextsReturnsEmptyIterableWhenGivenSnapshotNotInTheList() {
+    SnapshotContext since = new SnapshotContext(1);
+    List<Snapshot> snapshotList = Arrays.asList(anySnapshot, appendSnapshot, 
deleteSnapshot);
+
+    HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
+    Iterable<SnapshotContext> result = 
storageHandler.getSnapshots(snapshotList, since);
+
+    assertThat(result.iterator().hasNext(), is(false));
+  }
 }
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc4.q 
b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc4.q
index 494ea821a71..aea903996b1 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc4.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc4.q
@@ -1,7 +1,9 @@
 -- MV source tables are iceberg and MV has aggregate
 -- SORT_QUERY_RESULTS
 --! qt:replace:/(.*fromVersion=\[)\S+(\].*)/$1#Masked#$2/
+--! qt:replace:/(\s+Version\sinterval\sfrom\:\s+)\d+(\s*)/$1#Masked#/
 
+set hive.explain.user=false;
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
 
@@ -13,7 +15,7 @@ create external table tbl_ice_v2(d int, e string, f int) 
stored by iceberg store
 insert into tbl_ice values (1, 'one', 50), (4, 'four', 53), (5, 'five', 54);
 insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five 
v2', 54);
 
-create materialized view mat1 as
+create materialized view mat1 stored by iceberg stored as orc tblproperties 
('format-version'='2') as
 select tbl_ice.b, tbl_ice.c, sum(tbl_ice_v2.f)
 from tbl_ice
 join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
@@ -23,8 +25,23 @@ group by tbl_ice.b, tbl_ice.c;
 insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), 
(4, 'four', 53), (5, 'five', 54);
 insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five 
v2', 54);
 
+-- Incremental rebuild plan is more expensive than full rebuild
+set hive.materializedview.rebuild.incremental.factor=10.0;
+
 explain cbo
 alter materialized view mat1 rebuild;
+explain
+alter materialized view mat1 rebuild;
+
+
+-- Incremental rebuild plan is cheaper than full rebuild
+set hive.materializedview.rebuild.incremental.factor=0.01;
+
+explain cbo
+alter materialized view mat1 rebuild;
+explain
+alter materialized view mat1 rebuild;
+
 alter materialized view mat1 rebuild;
 
 select * from mat1;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc4.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc4.q.out
index 0f0583e52d4..0f52fa18f33 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc4.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc4.q.out
@@ -36,7 +36,7 @@ POSTHOOK: query: insert into tbl_ice_v2 values (1, 'one v2', 
50), (4, 'four v2',
 POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: default@tbl_ice_v2
-PREHOOK: query: create materialized view mat1 as
+PREHOOK: query: create materialized view mat1 stored by iceberg stored as orc 
tblproperties ('format-version'='2') as
 select tbl_ice.b, tbl_ice.c, sum(tbl_ice_v2.f)
 from tbl_ice
 join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
@@ -46,7 +46,8 @@ PREHOOK: Input: default@tbl_ice
 PREHOOK: Input: default@tbl_ice_v2
 PREHOOK: Output: database:default
 PREHOOK: Output: default@mat1
-POSTHOOK: query: create materialized view mat1 as
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: create materialized view mat1 stored by iceberg stored as orc 
tblproperties ('format-version'='2') as
 select tbl_ice.b, tbl_ice.c, sum(tbl_ice_v2.f)
 from tbl_ice
 join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
@@ -56,6 +57,7 @@ POSTHOOK: Input: default@tbl_ice
 POSTHOOK: Input: default@tbl_ice_v2
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@mat1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
 POSTHOOK: Lineage: mat1._c2 EXPRESSION 
[(tbl_ice_v2)tbl_ice_v2.FieldSchema(name:f, type:int, comment:null), ]
 POSTHOOK: Lineage: mat1.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, 
type:string, comment:null), ]
 POSTHOOK: Lineage: mat1.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, 
type:int, comment:null), ]
@@ -105,21 +107,581 @@ HiveAggregate(group=[{0, 1}], agg#0=[sum($2)])
       HiveProject(b=[$0], c=[$1], _c2=[$2])
         HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
 
+PREHOOK: query: explain
+alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Input: default@tbl_ice_v2
+PREHOOK: Output: default@mat1
+POSTHOOK: query: explain
+alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Input: default@tbl_ice_v2
+POSTHOOK: Output: default@mat1
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 8 <- Union 4 (CONTAINS)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Union 4 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice
+                  filterExpr: ((c > 52) and a is not null) (type: boolean)
+                  Statistics: Num rows: 8 Data size: 768 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Version interval from: #Masked#
+                  Filter Operator
+                    predicate: ((c > 52) and a is not null) (type: boolean)
+                    Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: a (type: int), b (type: string), c (type: 
int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string), _col2 (type: 
int)
+            Execution mode: vectorized
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_v2
+                  filterExpr: d is not null (type: boolean)
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Version interval from: #Masked#
+                  Filter Operator
+                    predicate: d is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: d (type: int), f (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: vectorized
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: default.mat1
+                  Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: b (type: string), c (type: int), _c2 (type: 
bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 2 Data size: 200 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col2)
+                      keys: _col0 (type: string), _col1 (type: int)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 4 Data size: 400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
int)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
+                        Statistics: Num rows: 4 Data size: 400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: bigint)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col4
+                Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col4)
+                  keys: _col1 (type: string), _col2 (type: int)
+                  minReductionHashAggr: 0.4
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: int)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
+                    Statistics: Num rows: 4 Data size: 400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col2)
+                  keys: _col0 (type: string), _col1 (type: int)
+                  minReductionHashAggr: 0.4
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: int)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
+                    Statistics: Num rows: 4 Data size: 400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 5 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+                      output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+                      serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+                      name: default.mat1
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: int), _col2 
(type: bigint)
+                  outputColumnNames: b, c, _c2
+                  Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: max(length(b)), avg(COALESCE(length(b),0)), 
count(1), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), 
compute_bit_vector_hll(c), min(_c2), max(_c2), count(_c2), 
compute_bit_vector_hll(_c2)
+                    minReductionHashAggr: 0.75
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                    Statistics: Num rows: 1 Data size: 568 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 568 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int), _col1 (type: 
struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: 
bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 
(type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: 
bigint), _col11 (type: bigint), _col12 (type: binary)
+        Reducer 6 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0), avg(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), 
min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), 
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), 
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: 'STRING' (type: string), 
UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), 
(_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) 
(type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) 
(type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: 
bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: 
binary), 'LONG' (type: string), _col9 (type: bigint), [...]
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17
+                  Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 794 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Union 4 
+            Vertex: Union 4
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+              output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+              serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+              name: default.mat1
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: b, c, _c2
+          Column Types: string, int, bigint
+          Table: default.mat1
+
+  Stage: Stage-4
+    Materialized View Update
+      name: default.mat1
+      update creation metadata: true
+
+PREHOOK: query: explain cbo
+alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Input: default@tbl_ice_v2
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+POSTHOOK: query: explain cbo
+alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Input: default@tbl_ice_v2
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+CBO PLAN:
+HiveProject($f0=[$4], $f1=[$5], $f2=[CASE(IS NULL($2), $6, IS NULL($6), $2, 
+($6, $2))])
+  HiveFilter(condition=[OR($3, IS NULL($3))])
+    HiveJoin(condition=[AND(IS NOT DISTINCT FROM($0, $4), IS NOT DISTINCT 
FROM($1, $5))], joinType=[right], algorithm=[BucketJoin], cost=[not available])
+      HiveProject(b=[$0], c=[$1], _c2=[$2], $f3=[true])
+        HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+      HiveProject(b=[$0], c=[$1], $f2=[$2])
+        HiveAggregate(group=[{1, 2}], agg#0=[sum($4)])
+          HiveJoin(condition=[=($0, $3)], joinType=[inner], 
algorithm=[CommonJoin], cost=[not available])
+            HiveProject(a=[$0], b=[$1], c=[$2])
+              HiveFilter(condition=[AND(>($2, 52), IS NOT NULL($0))])
+                HiveTableScan(table=[[default, tbl_ice]], 
table:alias=[tbl_ice], fromVersion=[#Masked#])
+            HiveProject(d=[$0], f=[$2])
+              HiveFilter(condition=[IS NOT NULL($0)])
+                HiveTableScan(table=[[default, tbl_ice_v2]], 
table:alias=[tbl_ice_v2], fromVersion=[#Masked#])
+
+PREHOOK: query: explain
+alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Input: default@tbl_ice_v2
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+POSTHOOK: query: explain
+alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Input: default@tbl_ice_v2
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-4 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-0
+  Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+  Stage: Stage-3
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+        Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: default.mat1
+                  Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: b (type: string), c (type: int), _c2 (type: 
bigint), true (type: boolean), PARTITION__SPEC__ID (type: int), PARTITION__HASH 
(type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
+                    Statistics: Num rows: 2 Data size: 616 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      null sort order: zz
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
+                      Statistics: Num rows: 2 Data size: 616 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: bigint), _col3 (type: 
boolean), _col4 (type: int), _col5 (type: bigint), _col6 (type: string), _col7 
(type: bigint)
+            Execution mode: vectorized
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice
+                  filterExpr: ((c > 52) and a is not null) (type: boolean)
+                  Statistics: Num rows: 8 Data size: 768 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Version interval from: #Masked#
+                  Filter Operator
+                    predicate: ((c > 52) and a is not null) (type: boolean)
+                    Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: a (type: int), b (type: string), c (type: 
int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string), _col2 (type: 
int)
+            Execution mode: vectorized
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_v2
+                  filterExpr: d is not null (type: boolean)
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Version interval from: #Masked#
+                  Filter Operator
+                    predicate: d is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: d (type: int), f (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Right Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string), _col1 (type: int)
+                  1 _col0 (type: string), _col1 (type: int)
+                nullSafes: [true, true]
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
+                Statistics: Num rows: 6 Data size: 1524 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Filter Operator
+                  predicate: _col3 (type: boolean)
+                  Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col4 (type: int), _col5 (type: bigint), 
_col6 (type: string), _col7 (type: bigint), _col0 (type: string), _col1 (type: 
int), _col2 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
+                    Statistics: Num rows: 1 Data size: 304 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: 
bigint), _col2 (type: string), _col3 (type: bigint)
+                      null sort order: aaaa
+                      sort order: ++++
+                      Statistics: Num rows: 1 Data size: 304 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col4 (type: string), _col5 (type: 
int), _col6 (type: bigint)
+                Filter Operator
+                  predicate: _col3 (type: boolean)
+                  Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col8 (type: string), _col9 (type: int), CASE 
WHEN (_col2 is null) THEN (_col10) WHEN (_col10 is null) THEN (_col2) ELSE 
((_col10 + _col2)) END (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 100 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 100 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+                          output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+                          serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+                          name: default.mat1
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: bigint)
+                      outputColumnNames: b, c, _c2
+                      Statistics: Num rows: 1 Data size: 100 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: max(length(b)), 
avg(COALESCE(length(b),0)), count(1), count(b), compute_bit_vector_hll(b), 
min(c), max(c), count(c), compute_bit_vector_hll(c), min(_c2), max(_c2), 
count(_c2), compute_bit_vector_hll(_c2)
+                        minReductionHashAggr: 0.4
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                        Statistics: Num rows: 1 Data size: 568 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          null sort order: 
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 568 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: int), _col1 (type: 
struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: 
bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 
(type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: 
bigint), _col11 (type: bigint), _col12 (type: binary)
+                Filter Operator
+                  predicate: _col3 is null (type: boolean)
+                  Statistics: Num rows: 4 Data size: 1016 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col8 (type: string), _col9 (type: int), CASE 
WHEN (_col2 is null) THEN (_col10) WHEN (_col10 is null) THEN (_col2) ELSE 
((_col10 + _col2)) END (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+                          output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+                          serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+                          name: default.mat1
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: bigint)
+                      outputColumnNames: b, c, _c2
+                      Statistics: Num rows: 4 Data size: 384 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: max(length(b)), 
avg(COALESCE(length(b),0)), count(1), count(b), compute_bit_vector_hll(b), 
min(c), max(c), count(c), compute_bit_vector_hll(c), min(_c2), max(_c2), 
count(_c2), compute_bit_vector_hll(_c2)
+                        minReductionHashAggr: 0.75
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                        Statistics: Num rows: 1 Data size: 568 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          null sort order: 
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 568 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: int), _col1 (type: 
struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: 
bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 
(type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: 
bigint), _col11 (type: bigint), _col12 (type: binary)
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), 
KEY.reducesinkkey3 (type: bigint), VALUE._col0 (type: string), VALUE._col1 
(type: int), VALUE._col2 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
+                Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+                      output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+                      serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+                      name: default.mat1
+        Reducer 4 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0), avg(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), 
min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), 
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), 
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: 'STRING' (type: string), 
UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), 
(_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) 
(type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) 
(type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: 
bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: 
binary), 'LONG' (type: string), _col9 (type: bigint), [...]
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17
+                  Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 794 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0), avg(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), 
min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), 
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), 
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: 'STRING' (type: string), 
UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), 
(_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) 
(type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) 
(type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: 
bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: 
binary), 'LONG' (type: string), _col9 (type: bigint), [...]
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17
+                  Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 794 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 7 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col4
+                Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col4)
+                  keys: _col1 (type: string), _col2 (type: int)
+                  minReductionHashAggr: 0.4
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: int)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
+                    Statistics: Num rows: 4 Data size: 400 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 8 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int)
+                  null sort order: zz
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
+                  Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint)
+
+  Stage: Stage-4
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+              output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+              serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+              name: default.mat1
+
+  Stage: Stage-5
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: b, c, _c2
+          Column Types: string, int, bigint
+          Table: default.mat1
+
+  Stage: Stage-6
+    Materialized View Update
+      name: default.mat1
+      update creation metadata: true
+
 PREHOOK: query: alter materialized view mat1 rebuild
 PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
 PREHOOK: Input: default@mat1
 PREHOOK: Input: default@tbl_ice
 PREHOOK: Input: default@tbl_ice_v2
 PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
 POSTHOOK: query: alter materialized view mat1 rebuild
 POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
 POSTHOOK: Input: default@mat1
 POSTHOOK: Input: default@tbl_ice
 POSTHOOK: Input: default@tbl_ice_v2
 POSTHOOK: Output: default@mat1
-POSTHOOK: Lineage: mat1._c2 EXPRESSION 
[(tbl_ice_v2)tbl_ice_v2.FieldSchema(name:f, type:int, comment:null), 
(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), ]
-POSTHOOK: Lineage: mat1.b EXPRESSION [(tbl_ice)tbl_ice.FieldSchema(name:b, 
type:string, comment:null), (mat1)default.mat1.FieldSchema(name:b, type:string, 
comment:null), ]
-POSTHOOK: Lineage: mat1.c EXPRESSION [(tbl_ice)tbl_ice.FieldSchema(name:c, 
type:int, comment:null), (mat1)default.mat1.FieldSchema(name:c, type:int, 
comment:null), ]
+POSTHOOK: Output: default@mat1
 PREHOOK: query: select * from mat1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@mat1
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc7.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc7.q.out
index dbbdd39e35e..93e56b61b96 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc7.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc7.q.out
@@ -76,7 +76,7 @@ POSTHOOK: Output: default@mat1
 CBO PLAN:
 HiveProject($f0=[$3], $f1=[CASE(IS NULL($1), $4, IS NULL($4), $1, +($4, $1))])
   HiveFilter(condition=[OR($2, IS NULL($2))])
-    HiveJoin(condition=[IS NOT DISTINCT FROM($0, $3)], joinType=[right], 
algorithm=[none], cost=[not available])
+    HiveJoin(condition=[IS NOT DISTINCT FROM($0, $3)], joinType=[right], 
algorithm=[BucketJoin], cost=[not available])
       HiveProject(a=[$0], _c1=[$1], $f2=[true])
         HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
       HiveProject(a=[$0], $f1=[$1])
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java
index badd90953ad..83dd37bd70e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
+import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.TableType;
@@ -66,7 +67,6 @@ import java.util.Comparator;
 import java.util.Date;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.Map.Entry;
@@ -284,7 +284,13 @@ class TextDescTableFormatter extends DescTableFormatter {
   private static MaterializationSnapshotFormatter 
createMaterializationSnapshotFormatter(
           MaterializationSnapshot snapshot) {
     if (snapshot != null && snapshot.getTableSnapshots() != null && 
!snapshot.getTableSnapshots().isEmpty()) {
-      return qualifiedTableName -> 
Objects.toString(snapshot.getTableSnapshots().get(qualifiedTableName), 
"Unknown");
+      return qualifiedTableName -> {
+        SnapshotContext snapshotContext = 
snapshot.getTableSnapshots().get(qualifiedTableName);
+        if (snapshotContext == null) {
+          return "Unknown";
+        }
+        return String.format("SnapshotContext{snapshotId=%d}", 
snapshotContext.getSnapshotId());
+      };
     } else if (snapshot != null && snapshot.getValidTxnList() != null) {
       ValidTxnWriteIdList validReaderWriteIdList = new 
ValidTxnWriteIdList(snapshot.getValidTxnList());
       return qualifiedTableName -> {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
index 2c1f148d303..3c6d973b1a7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
@@ -57,7 +57,6 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMateri
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentSnapshotMaterializationRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveInsertOnlyScanWriteIdRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveJoinInsertIncrementalRewritingRule;
-import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializationRelMetadataProvider;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HivePushdownSnapshotFilterRule;
@@ -360,13 +359,15 @@ public class AlterMaterializedViewRebuildAnalyzer extends 
CalcitePlanner {
           }
 
           if (visitor.isContainsAggregate()) {
-            return applyAggregateInsertIncremental(basePlan, mdProvider, 
executorProvider, optCluster, calcitePreMVRewritingPlan);
+            return applyAggregateInsertIncremental(
+                basePlan, mdProvider, executorProvider, optCluster, 
materialization, calcitePreMVRewritingPlan);
           } else {
             return applyJoinInsertIncremental(basePlan, mdProvider, 
executorProvider);
           }
         case AVAILABLE:
           if (!materialization.isSourceTablesUpdateDeleteModified()) {
-            return applyAggregateInsertIncremental(basePlan, mdProvider, 
executorProvider, optCluster, calcitePreMVRewritingPlan);
+            return applyAggregateInsertIncremental(
+                basePlan, mdProvider, executorProvider, optCluster, 
materialization, calcitePreMVRewritingPlan);
           } else {
             return applyAggregateInsertDeleteIncremental(basePlan, mdProvider, 
executorProvider);
           }
@@ -391,30 +392,28 @@ public class AlterMaterializedViewRebuildAnalyzer extends 
CalcitePlanner {
 
     private RelNode applyAggregateInsertIncremental(
             RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor 
executorProvider, RelOptCluster optCluster,
-            RelNode calcitePreMVRewritingPlan) {
+            HiveRelOptMaterialization materialization, RelNode 
calcitePreMVRewritingPlan) {
       mvRebuildMode = MaterializationRebuildMode.AGGREGATE_INSERT_REBUILD;
-      basePlan = applyIncrementalRebuild(basePlan, mdProvider, 
executorProvider,
+      RelNode incrementalRebuildPlan = applyIncrementalRebuild(basePlan, 
mdProvider, executorProvider,
               HiveInsertOnlyScanWriteIdRule.INSTANCE, 
HiveAggregateInsertIncrementalRewritingRule.INSTANCE);
 
       // Make a cost-based decision factoring the configuration property
-      optCluster.invalidateMetadataQuery();
-      
RelMetadataQuery.THREAD_PROVIDERS.set(HiveMaterializationRelMetadataProvider.DEFAULT);
-      try {
-        RelMetadataQuery mq = RelMetadataQuery.instance();
-        RelOptCost costOriginalPlan = 
mq.getCumulativeCost(calcitePreMVRewritingPlan);
-        final double factorSelectivity = HiveConf.getFloatVar(
-                conf, 
HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL_FACTOR);
-        RelOptCost costRebuildPlan = 
mq.getCumulativeCost(basePlan).multiplyBy(factorSelectivity);
-        if (costOriginalPlan.isLe(costRebuildPlan)) {
-          mvRebuildMode = MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD;
-          return calcitePreMVRewritingPlan;
-        }
+      RelOptCost costOriginalPlan = calculateCost(
+          optCluster, mdProvider, HiveTezModelRelMetadataProvider.DEFAULT, 
calcitePreMVRewritingPlan);
 
-        return basePlan;
-      } finally {
-        optCluster.invalidateMetadataQuery();
-        
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider));
+      RelOptCost costIncrementalRebuildPlan = calculateCost(optCluster, 
mdProvider,
+          
HiveIncrementalRelMdRowCount.createMetadataProvider(materialization), 
incrementalRebuildPlan);
+
+      final double factorSelectivity = HiveConf.getFloatVar(
+          conf, 
HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL_FACTOR);
+      costIncrementalRebuildPlan = 
costIncrementalRebuildPlan.multiplyBy(factorSelectivity);
+
+      if (costOriginalPlan.isLe(costIncrementalRebuildPlan)) {
+        mvRebuildMode = MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD;
+        return calcitePreMVRewritingPlan;
       }
+
+      return incrementalRebuildPlan;
     }
 
     private RelNode applyJoinInsertIncremental(
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index f44fb8f7f68..3ee27912e9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -107,6 +107,7 @@ import org.apache.hadoop.hive.common.DataCopyStatistics;
 import 
org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
 import 
org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.hive.common.log.InPlaceUpdate;
+import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -2156,38 +2157,44 @@ public class Hive {
         // Mixing native and non-native acid source tables are not supported. 
If the first source is native acid
         // the rest is expected to be native acid
         return getMSC().getMaterializationInvalidationInfo(
-                metadata.creationMetadata, 
conf.get(ValidTxnList.VALID_TXNS_KEY));
+            metadata.creationMetadata, conf.get(ValidTxnList.VALID_TXNS_KEY));
       }
     }
 
-    MaterializationSnapshot mvSnapshot = 
MaterializationSnapshot.fromJson(metadata.creationMetadata.getValidTxnList());
+    boolean allHasAppendsOnly = allTablesHasAppendsOnly(metadata);
+    Materialization materialization = new Materialization();
+    // TODO: delete operations are not supported yet.
+    // Set setSourceTablesCompacted to false when delete is supported
+    materialization.setSourceTablesCompacted(!allHasAppendsOnly);
+    materialization.setSourceTablesUpdateDeleteModified(!allHasAppendsOnly);
+    return materialization;
+  }
 
-    boolean hasAppendsOnly = true;
+  private boolean allTablesHasAppendsOnly(MaterializedViewMetadata metadata) 
throws HiveException {
+    MaterializationSnapshot mvSnapshot = 
MaterializationSnapshot.fromJson(metadata.creationMetadata.getValidTxnList());
     for (SourceTable sourceTable : metadata.getSourceTables()) {
       Table table = getTable(sourceTable.getTable().getDbName(), 
sourceTable.getTable().getTableName());
       HiveStorageHandler storageHandler = table.getStorageHandler();
-      if (storageHandler == null) {
-        Materialization materialization = new Materialization();
-        materialization.setSourceTablesCompacted(true);
-        return materialization;
-      }
-      Boolean b = storageHandler.hasAppendsOnly(
-          table, 
mvSnapshot.getTableSnapshots().get(table.getFullyQualifiedName()));
-      if (b == null) {
-        Materialization materialization = new Materialization();
-        materialization.setSourceTablesCompacted(true);
-        return materialization;
-      } else if (!b) {
-        hasAppendsOnly = false;
-        break;
+      // Currently mixing native and non-native source tables are not 
supported.
+      if (!(storageHandler != null &&
+          storageHandler.areSnapshotsSupported() &&
+          tableHasAppendsOnly(storageHandler, table, mvSnapshot))) {
+        return false;
       }
     }
-    Materialization materialization = new Materialization();
-    // TODO: delete operations are not supported yet.
-    // Set setSourceTablesCompacted to false when delete is supported
-    materialization.setSourceTablesCompacted(!hasAppendsOnly);
-    materialization.setSourceTablesUpdateDeleteModified(!hasAppendsOnly);
-    return materialization;
+
+    return true;
+  }
+
+  private boolean tableHasAppendsOnly(
+      HiveStorageHandler storageHandler, Table table, MaterializationSnapshot 
mvSnapshot) {
+    for (SnapshotContext snapshot : storageHandler.getSnapshotContexts(
+        table, 
mvSnapshot.getTableSnapshots().get(table.getFullyQualifiedName()))) {
+      if 
(!SnapshotContext.WriteOperationType.APPEND.equals(snapshot.getOperation())) {
+        return false;
+      }
+    }
+    return true;
   }
 
   /**
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index a2c476c9ad5..87f0afe50b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -689,6 +689,20 @@ public interface HiveStorageHandler extends Configurable {
     return null;
   }
 
+  /**
+   * Return snapshot metadata of table snapshots which are newer than the 
specified.
+   * The specified snapshot is excluded.
+   * @param hmsTable table metadata stored in Hive Metastore
+   * @param since the snapshot preceding the oldest snapshot which should be 
checked.
+   *              The value null means all should be checked.
+   * @return Iterable of {@link SnapshotContext}.
+   */
+  default Iterable<SnapshotContext> getSnapshotContexts(
+      org.apache.hadoop.hive.ql.metadata.Table hmsTable, SnapshotContext 
since) {
+    return Collections.emptyList();
+  }
+
+
   /**
    * Alter table operations can rely on this to customize the 
EnvironmentContext to be used during the alter table
    * invocation (both on client and server side of HMS)
@@ -699,6 +713,18 @@ public interface HiveStorageHandler extends Configurable {
       EnvironmentContext environmentContext) {
   }
 
+  /**
+   * Check the operation type of all snapshots which are newer than the 
specified. The specified snapshot is excluded.
+   * @deprecated
+   * <br>Use {@link 
HiveStorageHandler#getSnapshotContexts(org.apache.hadoop.hive.ql.metadata.Table 
hmsTable, SnapshotContext since)}
+   * and check {@link SnapshotContext.WriteOperationType#APPEND}.equals({@link 
SnapshotContext#getOperation()}).
+   *
+   * @param hmsTable table metadata stored in Hive Metastore
+   * @param since the snapshot preceding the oldest snapshot which should be 
checked.
+   *              The value null means all should be checked.
+   * @return null if table is empty, true if all snapshots are {@link 
SnapshotContext.WriteOperationType#APPEND}s, false otherwise.
+   */
+  @Deprecated
   default Boolean hasAppendsOnly(org.apache.hadoop.hive.ql.metadata.Table 
hmsTable, SnapshotContext since) {
     return null;
   }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
index bc2427eefcd..9527431aaf7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveIncrementalRelMdRowCount.java
@@ -27,8 +27,10 @@ import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.util.BuiltInMethod;
 import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.hadoop.hive.metastore.api.SourceTable;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.MaterializedViewMetadata;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -36,6 +38,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.stream.StreamSupport;
 
 public class HiveIncrementalRelMdRowCount extends HiveRelMdRowCount {
 
@@ -84,6 +87,13 @@ public class HiveIncrementalRelMdRowCount extends 
HiveRelMdRowCount {
       return super.getRowCount(rel, mq);
     }
 
+    HiveStorageHandler storageHandler = table.getStorageHandler();
+    if (storageHandler != null && storageHandler.areSnapshotsSupported()) {
+      SnapshotContext since = new 
SnapshotContext(Long.parseLong(table.getVersionIntervalFrom()));
+      return StreamSupport.stream(storageHandler.getSnapshotContexts(table, 
since).spliterator(), false)
+          .mapToDouble(SnapshotContext::getAddedRowCount).sum();
+    }
+
     return (double) sourceTable.getInsertedCount();
   }
 }
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out
index 000f07b5a1d..80d4fc63f0a 100644
--- 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out
@@ -102,7 +102,7 @@ POSTHOOK: Output: default@mat1
 CBO PLAN:
 HiveProject($f0=[$3], $f1=[CASE(IS NULL($1), $4, IS NULL($4), $1, +($4, $1))])
   HiveFilter(condition=[OR($2, IS NULL($2))])
-    HiveJoin(condition=[IS NOT DISTINCT FROM($0, $3)], joinType=[right], 
algorithm=[none], cost=[not available])
+    HiveJoin(condition=[IS NOT DISTINCT FROM($0, $3)], joinType=[right], 
algorithm=[BucketJoin], cost=[not available])
       HiveProject(a=[$0], _c1=[$1], $f2=[true])
         HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
       HiveProject(a=[$0], $f1=[$1])
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out
index 53978961bfa..93a9cb52e2d 100644
--- 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out
@@ -118,7 +118,7 @@ Explain
 CBO PLAN:
 HiveProject($f0=[$6], $f1=[$7], $f2=[CASE(IS NULL($2), $8, IS NULL($8), $2, 
+($8, $2))], $f3=[CASE(IS NULL($3), $9, IS NULL($9), $3, CASE(<($9, $3), $9, 
$3))], $f4=[CASE(IS NULL($4), $10, IS NULL($10), $4, CASE(>($10, $4), $10, 
$4))])
   HiveFilter(condition=[OR($5, IS NULL($5))])
-    HiveJoin(condition=[AND(IS NOT DISTINCT FROM($0, $6), IS NOT DISTINCT 
FROM($1, $7))], joinType=[right], algorithm=[none], cost=[not available])
+    HiveJoin(condition=[AND(IS NOT DISTINCT FROM($0, $6), IS NOT DISTINCT 
FROM($1, $7))], joinType=[right], algorithm=[BucketJoin], cost=[not available])
       HiveProject(a=[$0], b=[$1], _c2=[$2], _c3=[$3], _c4=[$4], $f5=[true])
         HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
       HiveProject(a=[$0], b=[$1], $f2=[$2], $f3=[$3], $f4=[$4])
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/common/type/SnapshotContext.java 
b/storage-api/src/java/org/apache/hadoop/hive/common/type/SnapshotContext.java
index 7d2080fb917..59ff05e1b8a 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/common/type/SnapshotContext.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/common/type/SnapshotContext.java
@@ -27,7 +27,18 @@ import java.util.Objects;
  * materialized view on Iceberg tables.
  */
 public class SnapshotContext {
+  public enum WriteOperationType {
+    APPEND,
+    REPLACE,
+    OVERWRITE,
+    DELETE,
+    UNKNOWN
+  }
+
   private long snapshotId;
+  private WriteOperationType operation;
+  private long addedRowCount;
+  private long deletedRowCount;
 
   /**
    * Constructor for json serialization
@@ -37,6 +48,16 @@ public class SnapshotContext {
 
   public SnapshotContext(long snapshotId) {
     this.snapshotId = snapshotId;
+    this.operation = null;
+    this.addedRowCount = 0;
+    this.deletedRowCount = 0;
+  }
+
+  public SnapshotContext(long snapshotId, WriteOperationType operation, long 
addedRowCount, long deletedRowCount) {
+    this.snapshotId = snapshotId;
+    this.operation = operation;
+    this.addedRowCount = addedRowCount;
+    this.deletedRowCount = deletedRowCount;
   }
 
   /**
@@ -64,10 +85,25 @@ public class SnapshotContext {
     return Objects.hash(snapshotId);
   }
 
+  public WriteOperationType getOperation() {
+    return operation;
+  }
+
+  public long getAddedRowCount() {
+    return addedRowCount;
+  }
+
+  public long getDeletedRowCount() {
+    return deletedRowCount;
+  }
+
   @Override
   public String toString() {
     return "SnapshotContext{" +
         "snapshotId=" + snapshotId +
+        ", operation=" + operation +
+        ", addedRowCount=" + addedRowCount +
+        ", deletedRowCount=" + deletedRowCount +
         '}';
   }
 }

(hive) branch master updated: HIVE-28126: Use added record count in cost model when rebuilding materialized view stored by iceberg (Krisztian Kasa, reviewed by Denys Kuzmenko, okumin)

Reply via email to