This is an automated email from the ASF dual-hosted git repository. tchoi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new ac1cfe2294d HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor) ac1cfe2294d is described below commit ac1cfe2294d1b7b8c02b60899d69868920ede0f5 Author: Teddy Choi <tc...@apache.org> AuthorDate: Wed Jan 4 13:35:57 2023 +0900 HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor) --- .../java/org/apache/hadoop/hive/ql/ErrorMsg.java | 1 + .../hive/ql/hooks/EnforceReadOnlyDatabaseHook.java | 196 +++++++++++++++++++++ .../test/queries/clientnegative/read_only_delete.q | 23 +++ .../test/queries/clientnegative/read_only_insert.q | 23 +++ .../test/queries/clientpositive/read_only_hook.q | 27 +++ .../results/clientnegative/read_only_delete.q.out | 34 ++++ .../results/clientnegative/read_only_insert.q.out | 35 ++++ .../clientpositive/llap/read_only_hook.q.out | 71 ++++++++ 8 files changed, 410 insertions(+) diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 1a3ad38acef..5e4fbf8e642 100644 --- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -485,6 +485,7 @@ public enum ErrorMsg { CTLF_UNSUPPORTED_FORMAT(10434, "CREATE TABLE LIKE FILE is not supported by the ''{0}'' file format", true), NON_NATIVE_ACID_UPDATE(10435, "Update and Merge into non-native ACID table is only supported when " + HiveConf.ConfVars.SPLIT_UPDATE.varname + " is true."), + READ_ONLY_DATABASE(10436, "Database {0} is read-only", true), //========================== 20000 range starts here ========================// diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java new file mode 100644 index 00000000000..c343fc7b14f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.hooks; + +import com.google.common.collect.Sets; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.ddl.DDLDesc; +import org.apache.hadoop.hive.ql.ddl.DDLWork; +import org.apache.hadoop.hive.ql.ddl.database.alter.poperties.AlterDatabaseSetPropertiesDesc; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * EnforceReadOnlyDatabaseHook is a hook that disallows write operations on read-only databases. + * It's enforced when "hive.exec.pre.hooks" has "org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook" and + * a database has 'readonly'='true' property. + */ +public class EnforceReadOnlyDatabaseHook implements ExecuteWithHookContext { + public static final String READONLY = "readonly"; + private static final Logger LOG = LoggerFactory.getLogger(EnforceReadOnlyDatabaseHook.class); + + @Override + public void run(HookContext hookContext) throws Exception { + assert(hookContext.getHookType() == HookContext.HookType.PRE_EXEC_HOOK); + final QueryState queryState = hookContext.getQueryState(); + final HiveOperation hiveOperation = queryState.getHiveOperation(); + + // Allow read-only type operations, excluding query. + // A query can be EXPLAIN, SELECT, INSERT, UPDATE, or DELETE. + if (isReadOnlyOperation(hiveOperation)) { + return; + } + + // Allow EXPLAIN or SELECT query operations, disallow INSERT, UPDATE, and DELETE. + if (isExplainOrSelectQuery(hookContext)) { + return; + } + + // Allow ALTERDATABASE operations to make writable. + // It's a special allowed case to get out from readonly mode. + if (isAlterDbWritable(hookContext)) { + return; + } + // The second exception is to load a dumped database. + if (hiveOperation == HiveOperation.REPLLOAD) { + return; + } + + // Now the remaining operation is a write operation, as a read operation is already allowed. + + // Disallow write operations on a read-only database. + checkReadOnlyDbAsOutput(hookContext); + + // Allow write operations on writable databases. + } + + private static void checkReadOnlyDbAsOutput(HookContext hookContext) throws HiveException { + LOG.debug(hookContext.getQueryPlan().getOutputs().toString()); + // If it has a data/metadata change operation on a read-only database, throw an exception. + final Set<WriteEntity> outputs = hookContext.getQueryPlan().getOutputs(); + for (WriteEntity output: outputs) { + // Get the database. + final Database database; + final Hive hive = SessionState.get().getHiveDb(); + if (output.getDatabase() == null) { + // For a table, get its database. + database = hive.getDatabase(output.getTable().getDbName()); + } else { + // For a database, allow new one, since it's not read-only yet. + database = output.getDatabase(); + final boolean exists = hive.databaseExists(database.getName()); + LOG.debug("database exists: {}", exists); + if (!exists) { + continue; + } + } + + // Read the database property. + final Map<String, String> parameters = database.getParameters(); + LOG.debug("database name: " + database.getName() + " param: " + database.getParameters()); + + // If it's a read-only database, disallow it. + if (parameters == null) { + continue; + } + if (parameters.isEmpty()) { + continue; + } + if (!parameters.containsKey(READONLY)) { + continue; + } + if ("true".equalsIgnoreCase(parameters.get(READONLY))) { + throw new SemanticException(ErrorMsg.READ_ONLY_DATABASE, database.getName()); + } + } + } + + private static boolean isExplainOrSelectQuery(HookContext hookContext) { + final QueryState queryState = hookContext.getQueryState(); + final HiveOperation hiveOperation = queryState.getHiveOperation(); + + // Allow EXPLAIN, SELECT queries, not INSERT, UPDATE, DELETE queries. + if (hiveOperation == HiveOperation.QUERY) { + if (hookContext.getQueryPlan().isExplain()) { + return true; + } + final String upper = queryState.getQueryString().trim().toUpperCase(); + if (upper.startsWith("SELECT")) { + return true; + } + } + return false; + } + + /** + * Checks whether it's an ALTERDATABASE statement without {"readonly":"true"} property. + * 1. The properties are removed. + * 2. "readonly" is removed from the properties. + * 3. "readonly" becomes non-true value. + */ + private static boolean isAlterDbWritable(HookContext hookContext) { + final HiveOperation hiveOperation = hookContext.getQueryState().getHiveOperation(); + if (hiveOperation == HiveOperation.ALTERDATABASE) { + final List<Task<?>> rootTasks = hookContext.getQueryPlan().getRootTasks(); + if (rootTasks.size() == 1) { + final Serializable rootWork = rootTasks.get(0).getWork(); + if (rootWork instanceof DDLWork) { + final DDLWork ddlWork = (DDLWork) rootWork; + final DDLDesc ddlDesc = ddlWork.getDDLDesc(); + if (ddlDesc instanceof AlterDatabaseSetPropertiesDesc) { + final AlterDatabaseSetPropertiesDesc alterDatabaseDesc = (AlterDatabaseSetPropertiesDesc) ddlDesc; + final Map<String, String> properties = alterDatabaseDesc.getDatabaseProperties(); + if (properties == null) { + // The properties are removed. + return true; + } + if (!properties.containsKey(READONLY)) { + // "readonly" is removed from the properties. + return true; + } + if (!"true".equalsIgnoreCase(properties.get(READONLY))) { + // "readonly" becomes non-true value. + return true; + } + } + } + } + } + return false; + } + + private static boolean isReadOnlyOperation(HiveOperation hiveOperation) { + switch (hiveOperation) { + case EXPLAIN: + case SWITCHDATABASE: + case REPLDUMP: + case REPLSTATUS: + case EXPORT: + case KILL_QUERY: + return true; + } + if (Sets.newHashSet("SHOW", "DESC").stream().anyMatch(hiveOperation.name()::startsWith)) { + return true; + } + return false; + } +} diff --git a/ql/src/test/queries/clientnegative/read_only_delete.q b/ql/src/test/queries/clientnegative/read_only_delete.q new file mode 100644 index 00000000000..d91f0c0febf --- /dev/null +++ b/ql/src/test/queries/clientnegative/read_only_delete.q @@ -0,0 +1,23 @@ +set hive.vectorized.execution.enabled=false; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook; + +CREATE DATABASE writable; +CREATE TABLE writable.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true'); +ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false'); +DELETE FROM writable.src WHERE cint < 0; + +CREATE DATABASE readonly; +CREATE TABLE readonly.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true'); +ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true'); +DELETE FROM readonly.src WHERE cint < 0; diff --git a/ql/src/test/queries/clientnegative/read_only_insert.q b/ql/src/test/queries/clientnegative/read_only_insert.q new file mode 100644 index 00000000000..e3e28943b84 --- /dev/null +++ b/ql/src/test/queries/clientnegative/read_only_insert.q @@ -0,0 +1,23 @@ +set hive.vectorized.execution.enabled=false; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook; + +CREATE DATABASE writable; +CREATE TABLE writable.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true'); +ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false'); +INSERT INTO writable.src VALUES(1); + +CREATE DATABASE readonly; +CREATE TABLE readonly.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true'); +ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true'); +INSERT INTO readonly.src VALUES(1); diff --git a/ql/src/test/queries/clientpositive/read_only_hook.q b/ql/src/test/queries/clientpositive/read_only_hook.q new file mode 100644 index 00000000000..007af957738 --- /dev/null +++ b/ql/src/test/queries/clientpositive/read_only_hook.q @@ -0,0 +1,27 @@ +--! qt:dataset:src +set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook; + +ALTER DATABASE default SET DBPROPERTIES('readonly' = 'true'); + +-- SET +set hive.exec.reducers.max = 1; + +-- USE +use default; + +-- SHOW +SHOW DATABASES; +SHOW CREATE DATABASE default; +SHOW CREATE TABLE src; + +-- DESC +DESC DATABASE default; + +-- DESCRIBE +DESCRIBE src; + +-- EXPLAIN +EXPLAIN SELECT * FROM src LIMIT 1; + +-- SELECT +SELECT * FROM src LIMIT 1; diff --git a/ql/src/test/results/clientnegative/read_only_delete.q.out b/ql/src/test/results/clientnegative/read_only_delete.q.out new file mode 100644 index 00000000000..59e78080e31 --- /dev/null +++ b/ql/src/test/results/clientnegative/read_only_delete.q.out @@ -0,0 +1,34 @@ +POSTHOOK: query: CREATE DATABASE writable +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:writable +POSTHOOK: query: CREATE TABLE writable.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:writable +POSTHOOK: Output: writable@src +POSTHOOK: query: ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false') +POSTHOOK: type: ALTERDATABASE +POSTHOOK: Output: database:writable +POSTHOOK: query: DELETE FROM writable.src WHERE cint < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: writable@src +POSTHOOK: Output: writable@src +POSTHOOK: query: CREATE DATABASE readonly +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:readonly +POSTHOOK: query: CREATE TABLE readonly.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:readonly +POSTHOOK: Output: readonly@src +POSTHOOK: query: ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true') +POSTHOOK: type: ALTERDATABASE +POSTHOOK: Output: database:readonly +FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.parse.SemanticException(Database readonly is read-only) +org.apache.hadoop.hive.ql.parse.SemanticException: Database readonly is read-only +#### A masked pattern was here #### + diff --git a/ql/src/test/results/clientnegative/read_only_insert.q.out b/ql/src/test/results/clientnegative/read_only_insert.q.out new file mode 100644 index 00000000000..f78f18a5055 --- /dev/null +++ b/ql/src/test/results/clientnegative/read_only_insert.q.out @@ -0,0 +1,35 @@ +POSTHOOK: query: CREATE DATABASE writable +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:writable +POSTHOOK: query: CREATE TABLE writable.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:writable +POSTHOOK: Output: writable@src +POSTHOOK: query: ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false') +POSTHOOK: type: ALTERDATABASE +POSTHOOK: Output: database:writable +POSTHOOK: query: INSERT INTO writable.src VALUES(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: writable@src +POSTHOOK: Lineage: src.cint SCRIPT [] +POSTHOOK: query: CREATE DATABASE readonly +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:readonly +POSTHOOK: query: CREATE TABLE readonly.src + (cint INT) + CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC + TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:readonly +POSTHOOK: Output: readonly@src +POSTHOOK: query: ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true') +POSTHOOK: type: ALTERDATABASE +POSTHOOK: Output: database:readonly +FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.parse.SemanticException(Database readonly is read-only) +org.apache.hadoop.hive.ql.parse.SemanticException: Database readonly is read-only +#### A masked pattern was here #### + diff --git a/ql/src/test/results/clientpositive/llap/read_only_hook.q.out b/ql/src/test/results/clientpositive/llap/read_only_hook.q.out new file mode 100644 index 00000000000..d7a1e983d4c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/read_only_hook.q.out @@ -0,0 +1,71 @@ +POSTHOOK: query: ALTER DATABASE default SET DBPROPERTIES('readonly' = 'true') +POSTHOOK: type: ALTERDATABASE +POSTHOOK: Output: database:default +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:default +POSTHOOK: query: SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +POSTHOOK: query: SHOW CREATE DATABASE default +POSTHOOK: type: SHOW_CREATEDATABASE +POSTHOOK: Input: database:default +CREATE DATABASE `default` +COMMENT + 'Default Hive database' +LOCATION +#### A masked pattern was here #### +WITH DBPROPERTIES ( + 'readonly'='true') +POSTHOOK: query: SHOW CREATE TABLE src +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@src +CREATE TABLE `src`( + `key` string COMMENT 'default', + `value` string COMMENT 'default') +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION +#### A masked pattern was here #### +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### +POSTHOOK: query: DESC DATABASE default +POSTHOOK: type: DESCDATABASE +POSTHOOK: Input: database:default +default Default Hive database location/in/test public ROLE +POSTHOOK: query: DESCRIBE src +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +key string default +value string default +POSTHOOK: query: EXPLAIN SELECT * FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + TableScan + alias: src + Limit + Number of rows: 1 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + ListSink + +POSTHOOK: query: SELECT * FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238