This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 61b9683524f HIVE-26922: Deadlock when rebuilding Materialized view stored by Iceberg (Krisztian Kasa, reviewed by Aman Sinha, Stamatis Zampetakis, Steve Carlin) 61b9683524f is described below commit 61b9683524f5d865e973b296aa3ab074b08c1be9 Author: Krisztian Kasa <kasakri...@gmail.com> AuthorDate: Thu Jan 19 06:08:33 2023 +0100 HIVE-26922: Deadlock when rebuilding Materialized view stored by Iceberg (Krisztian Kasa, reviewed by Aman Sinha, Stamatis Zampetakis, Steve Carlin) --- .../src/test/queries/positive/mv_iceberg_orc2.q | 11 +++- .../src/test/queries/positive/mv_iceberg_orc3.q | 19 +++++++ .../test/results/positive/mv_iceberg_orc2.q.out | 42 ++++++++++++++ .../test/results/positive/mv_iceberg_orc3.q.out | 65 ++++++++++++++++++++++ .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 30 ++++++---- .../hive/ql/metadata/StorageHandlerMock.java | 5 +- 6 files changed, 156 insertions(+), 16 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc2.q b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc2.q index 3241fa23956..0e06d816868 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc2.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc2.q @@ -1,4 +1,4 @@ --- MV data is stored by iceberg +-- MV data is stored by iceberg v1 -- SORT_QUERY_RESULTS set hive.explain.user=false; @@ -22,3 +22,12 @@ select * from mat1; explain cbo select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52; + +insert into tbl_ice values (10, 'ten', 60); + +explain cbo +alter materialized view mat1 rebuild; + +alter materialized view mat1 rebuild; + +select * from mat1; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc3.q b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc3.q new file mode 100644 index 00000000000..6d7b682f316 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc3.q @@ -0,0 +1,19 @@ +-- MV data is stored by iceberg v2 + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +drop materialized view if exists mat1; +drop table if exists tbl_ice; + +create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='2'); +insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54); + +create materialized view mat1 stored by iceberg stored as orc tblproperties ('format-version'='2') as +select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52; + +delete from tbl_ice where a = 4; + +alter materialized view mat1 rebuild; + +select * from mat1; diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc2.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc2.q.out index d01f254810b..0a37fdc1b70 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc2.q.out @@ -185,3 +185,45 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1]) +PREHOOK: query: insert into tbl_ice values (10, 'ten', 60) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: insert into tbl_ice values (10, 'ten', 60) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: explain cbo +alter materialized view mat1 rebuild +PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: default@mat1 +POSTHOOK: query: explain cbo +alter materialized view mat1 rebuild +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: default@mat1 +CBO PLAN: +HiveProject(b=[$1], c=[$2]) + HiveFilter(condition=[>($2, 52)]) + HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice]) + +PREHOOK: query: alter materialized view mat1 rebuild +PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: default@mat1 +POSTHOOK: query: alter materialized view mat1 rebuild +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: default@mat1 +PREHOOK: query: select * from mat1 +PREHOOK: type: QUERY +PREHOOK: Input: default@mat1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from mat1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mat1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +five 54 +four 53 +ten 60 diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc3.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc3.q.out new file mode 100644 index 00000000000..76906b899e5 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc3.q.out @@ -0,0 +1,65 @@ +PREHOOK: query: drop materialized view if exists mat1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: query: drop materialized view if exists mat1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: query: drop table if exists tbl_ice +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tbl_ice +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: create materialized view mat1 stored by iceberg stored as orc tblproperties ('format-version'='2') as +select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: database:default +PREHOOK: Output: default@mat1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: create materialized view mat1 stored by iceberg stored as orc tblproperties ('format-version'='2') as +select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mat1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: Lineage: mat1.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: mat1.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ] +PREHOOK: query: delete from tbl_ice where a = 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: delete from tbl_ice where a = 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: alter materialized view mat1 rebuild +PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: default@mat1 +POSTHOOK: query: alter materialized view mat1 rebuild +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: default@mat1 +PREHOOK: query: select * from mat1 +PREHOOK: type: QUERY +PREHOOK: Input: default@mat1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from mat1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mat1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +five 54 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index c7ba4a7d196..03d71982323 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -3029,13 +3029,16 @@ public class AcidUtils { case INSERT_OVERWRITE: assert t != null; if (AcidUtils.isTransactionalTable(t)) { - if (conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK) && !sharedWrite - && !isLocklessReadsEnabled) { + if (conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK) && !sharedWrite + && !isLocklessReadsEnabled) { compBuilder.setExclusive(); } else { compBuilder.setExclWrite(); } compBuilder.setOperationType(DataOperationType.UPDATE); + } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) { + compBuilder.setLock(getLockTypeFromStorageHandler(output, t)); + compBuilder.setOperationType(DataOperationType.UPDATE); } else { compBuilder.setExclusive(); compBuilder.setOperationType(DataOperationType.NO_TXN); @@ -3063,15 +3066,7 @@ public class AcidUtils { break; } } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) { - final HiveStorageHandler storageHandler = Preconditions.checkNotNull(t.getStorageHandler(), - "Thought all the non native tables have an instance of storage handler"); - LockType lockType = storageHandler.getLockType(output); - if (null == LockType.findByValue(lockType.getValue())) { - throw new IllegalArgumentException(String - .format("Lock type [%s] for Database.Table [%s.%s] is unknown", lockType, t.getDbName(), - t.getTableName())); - } - compBuilder.setLock(lockType); + compBuilder.setLock(getLockTypeFromStorageHandler(output, t)); } else { if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) { compBuilder.setExclusive(); @@ -3122,7 +3117,18 @@ public class AcidUtils { } return lockComponents; } - + + private static LockType getLockTypeFromStorageHandler(WriteEntity output, Table t) { + final HiveStorageHandler storageHandler = Preconditions.checkNotNull(t.getStorageHandler(), + "Non-native tables must have an instance of storage handler."); + LockType lockType = storageHandler.getLockType(output); + if (null == lockType) { + throw new IllegalArgumentException( + String.format("Lock type for Database.Table [%s.%s] is null", t.getDbName(), t.getTableName())); + } + return lockType; + } + public static boolean isExclusiveCTASEnabled(Configuration conf) { return HiveConf.getBoolVar(conf, ConfVars.TXN_CTAS_X_LOCK); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/StorageHandlerMock.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/StorageHandlerMock.java index 4de7d3e18cc..1548f650e17 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/StorageHandlerMock.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/StorageHandlerMock.java @@ -65,12 +65,11 @@ public class StorageHandlerMock extends DefaultStorageHandler { }; } - @Override public LockType getLockType(WriteEntity writeEntity - ) { + @Override public LockType getLockType(WriteEntity writeEntity) { if (writeEntity.getWriteType().equals(WriteEntity.WriteType.INSERT)) { return LockType.SHARED_READ; } - return LockType.SHARED_WRITE; + return super.getLockType(writeEntity); } @Override public Class<? extends OutputFormat> getOutputFormatClass() {