This is an automated email from the ASF dual-hosted git repository. dengzh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new f8f6b167ed3 HIVE-26799: Make authorizations on custom UDFs involved in tables/view configurable (Sai Hemanth Gantasala, reviewed by John Sherman, Zhihua Deng) f8f6b167ed3 is described below commit f8f6b167ed3623f380d6a26903c54a8a98ddd9aa Author: Sai Hemanth Gantasala <68923650+saihemanth-cloud...@users.noreply.github.com> AuthorDate: Mon Dec 12 17:55:17 2022 -0800 HIVE-26799: Make authorizations on custom UDFs involved in tables/view configurable (Sai Hemanth Gantasala, reviewed by John Sherman, Zhihua Deng) Closes #3821 --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java | 14 ++ .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 25 +++ .../authorization/command/CommandAuthorizerV2.java | 29 ++-- .../authorization_functions_in_views.q | 56 +++++++ .../llap/authorization_functions_in_views.q.out | 181 +++++++++++++++++++++ 6 files changed, 298 insertions(+), 10 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 44452b970e5..b3b715a81ae 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3599,6 +3599,9 @@ public class HiveConf extends Configuration { HIVE_AUTHORIZATION_TABLES_ON_STORAGEHANDLERS("hive.security.authorization.tables.on.storagehandlers", true, "Enables authorization on tables with custom storage handlers as implemented by HIVE-24705. " + "Default setting is true. Useful for turning the feature off if the corresponding ranger patch is missing."), + HIVE_AUTHORIZATION_FUNCTIONS_IN_VIEW("hive.security.authorization.functions.in.view", true, + "Enable authorization on functions/udfs used within a TABLE/VIEW during a select query. Default " + + "setting is true When set to false, the udf used in the table/view will not be autiozed during the select query"), // if this is not set default value is set during config initialization // Default value can't be set in this constructor as it would refer names in other ConfVars diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 530b41a7056..b8e1e49386c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -142,6 +142,9 @@ public abstract class BaseSemanticAnalyzer { protected QueryProperties queryProperties; ParseContext pCtx = null; + //user defined functions in query + protected Set<String> userSuppliedFunctions; + /** * A set of FileSinkOperators being written to in an ACID compliant way. We need to remember * them here because when we build them we don't yet know the write id. We need to go @@ -286,6 +289,7 @@ public abstract class BaseSemanticAnalyzer { inputs = new LinkedHashSet<ReadEntity>(); outputs = new LinkedHashSet<WriteEntity>(); txnManager = queryState.getTxnManager(); + userSuppliedFunctions = new HashSet<>(); } catch (Exception e) { throw new SemanticException(e); } @@ -1443,6 +1447,16 @@ public abstract class BaseSemanticAnalyzer { this.updateColumnAccessInfo = updateColumnAccessInfo; } + /** + * Gets the user supplied functions. + * Note 1: This list only accumulates UDFs explicitly mentioned in the query + * Note 2: This list will not include UDFs defined with views/tables + * @return List of String with names of UDFs. + */ + public Set<String> getUserSuppliedFunctions() { + return userSuppliedFunctions; + } + /** * Checks if given specification is proper specification for prefix of * partition cols, for table partitioned by ds, hr, min valid ones are diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 75c62ca6008..57702036cdb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -136,6 +136,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.FunctionUtils; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; @@ -12539,6 +12540,26 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } + void gatherUserSuppliedFunctions(ASTNode ast) throws SemanticException { + int tokenType = ast.getToken().getType(); + if (tokenType == HiveParser.TOK_FUNCTION || + tokenType == HiveParser.TOK_FUNCTIONDI || + tokenType == HiveParser.TOK_FUNCTIONSTAR) { + if (ast.getChild(0).getType() == HiveParser.Identifier) { + try { + String functionName = unescapeIdentifier(ast.getChild(0).getText()).toLowerCase(); + String[] qualifiedFunctionName = FunctionUtils.getQualifiedFunctionNameParts(functionName); + this.userSuppliedFunctions.add(qualifiedFunctionName[0]+"."+qualifiedFunctionName[1]); + } catch (HiveException ex) { + throw new SemanticException(ex.getMessage(), ex); + } + } + } + for (int i = 0; i < ast.getChildCount();i++) { + gatherUserSuppliedFunctions((ASTNode) ast.getChild(i)); + } + } + boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { ASTNode child = ast; this.ast = ast; @@ -12601,6 +12622,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // the basic idea is similar to unparseTranslator. tableMask = new TableMask(this, conf, ctx.isSkipTableMasking()); + // Gather UDFs referenced in query before VIEW expansion. This is used to + // determine if authorization checks need to occur on the UDFs. + gatherUserSuppliedFunctions(child); + // 4. continue analyzing from the child ASTNode. Phase1Ctx ctx_1 = initPhase1Ctx(); if (!doPhase1(child, qb, ctx_1, plannerCtx)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java index 13281980cc1..27fbe94f38d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java @@ -72,8 +72,8 @@ final class CommandAuthorizerV2 { List<WriteEntity> outputList = new ArrayList<WriteEntity>(outputs); addPermanentFunctionEntities(ss, inputList); - List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputList, selectTab2Cols, hiveOpType); - List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputList, updateTab2Cols, hiveOpType); + List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputList, selectTab2Cols, hiveOpType, sem); + List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputList, updateTab2Cols, hiveOpType, sem); HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); authzContextBuilder.setUserIpAddress(ss.getUserIpAddress()); @@ -98,13 +98,13 @@ final class CommandAuthorizerV2 { } private static List<HivePrivilegeObject> getHivePrivObjects(List<? extends Entity> privObjects, - Map<String, List<String>> tableName2Cols, HiveOperationType hiveOpType) throws HiveException { + Map<String, List<String>> tableName2Cols, HiveOperationType hiveOpType, BaseSemanticAnalyzer sem) throws HiveException { List<HivePrivilegeObject> hivePrivobjs = new ArrayList<HivePrivilegeObject>(); - if (privObjects == null){ + if (privObjects == null) { return hivePrivobjs; } - for (Entity privObject : privObjects){ + for (Entity privObject : privObjects) { if (privObject.isDummy()) { //do not authorize dummy readEntity or writeEntity continue; @@ -114,19 +114,19 @@ final class CommandAuthorizerV2 { // it's not inside a deferred authorized view. ReadEntity reTable = (ReadEntity)privObject; Boolean isDeferred = false; - if( reTable.getParents() != null && reTable.getParents().size() > 0){ - for( ReadEntity re: reTable.getParents()){ + if ( reTable.getParents() != null && reTable.getParents().size() > 0) { + for ( ReadEntity re: reTable.getParents()){ if (re.getTyp() == Type.TABLE && re.getTable() != null ) { Table t = re.getTable(); - if(!isDeferredAuthView(t)){ + if (!isDeferredAuthView(t)) { continue; - }else{ + } else { isDeferred = true; } } } } - if(!isDeferred){ + if (!isDeferred) { continue; } } @@ -139,6 +139,15 @@ final class CommandAuthorizerV2 { continue; } + if (privObject.getTyp() == Type.FUNCTION && !HiveConf.getBoolVar(SessionState.get().getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_FUNCTIONS_IN_VIEW) && hiveOpType == HiveOperationType.QUERY) { + String[] qualifiedFunctionName = new String[]{privObject.getDatabase() != null ? + privObject.getDatabase().getName() : null, privObject.getFunctionName()}; + if (!sem.getUserSuppliedFunctions().contains(qualifiedFunctionName[0] + "." + qualifiedFunctionName[1])) { + continue; + } + } + addHivePrivObject(privObject, tableName2Cols, hivePrivobjs, hiveOpType); } return hivePrivobjs; diff --git a/ql/src/test/queries/clientpositive/authorization_functions_in_views.q b/ql/src/test/queries/clientpositive/authorization_functions_in_views.q new file mode 100644 index 00000000000..7bdf4a76baa --- /dev/null +++ b/ql/src/test/queries/clientpositive/authorization_functions_in_views.q @@ -0,0 +1,56 @@ +--! qt:authorizer +set user.name=hive_admin_user; + +-- admin required for create function +set role ADMIN; + +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; +set hive.security.authorization.enabled=true; + +drop function if exists udf_upper; + +drop function if exists udf_lower; + +drop table if exists base_table; + +drop view if exists view_using_udf; + +drop database if exists test; + +create function udf_upper as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper'; + +create table base_table(city string); + +create view view_using_udf as select udf_upper(city) as upper_city from base_table; + +select * from view_using_udf; + +create function udf_lower as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower'; + +select udf_lower(upper_city) from view_using_udf; + +create database test; + +create function test.UDF_upper as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper'; + +select test.UDF_Upper(upper_city) from view_using_udf; + +set hive.security.authorization.functions.in.view=false; + +select * from view_using_udf; + +select udf_lower(upper_city) from view_using_udf; + +select test.UDF_Upper(upper_city) from view_using_udf; + +drop function test.UDF_Upper; + +drop database test; + +drop function udf_lower; + +drop function udf_upper; + +drop view view_using_udf; + +drop table base_table; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/authorization_functions_in_views.q.out b/ql/src/test/results/clientpositive/llap/authorization_functions_in_views.q.out new file mode 100644 index 00000000000..4094871cab2 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/authorization_functions_in_views.q.out @@ -0,0 +1,181 @@ +PREHOOK: query: set role ADMIN +PREHOOK: type: SHOW_ROLES +POSTHOOK: query: set role ADMIN +POSTHOOK: type: SHOW_ROLES +PREHOOK: query: drop function if exists udf_upper +PREHOOK: type: DROPFUNCTION +POSTHOOK: query: drop function if exists udf_upper +POSTHOOK: type: DROPFUNCTION +PREHOOK: query: drop function if exists udf_lower +PREHOOK: type: DROPFUNCTION +POSTHOOK: query: drop function if exists udf_lower +POSTHOOK: type: DROPFUNCTION +PREHOOK: query: drop table if exists base_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists base_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop view if exists view_using_udf +PREHOOK: type: DROPVIEW +POSTHOOK: query: drop view if exists view_using_udf +POSTHOOK: type: DROPVIEW +PREHOOK: query: drop database if exists test +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists test +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create function udf_upper as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: database:default +PREHOOK: Output: default.udf_upper +POSTHOOK: query: create function udf_upper as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: database:default +POSTHOOK: Output: default.udf_upper +PREHOOK: query: create table base_table(city string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@base_table +POSTHOOK: query: create table base_table(city string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@base_table +PREHOOK: query: create view view_using_udf as select udf_upper(city) as upper_city from base_table +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@base_table +PREHOOK: Output: database:default +PREHOOK: Output: default@view_using_udf +POSTHOOK: query: create view view_using_udf as select udf_upper(city) as upper_city from base_table +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@base_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@view_using_udf +POSTHOOK: Lineage: view_using_udf.upper_city EXPRESSION [(base_table)base_table.FieldSchema(name:city, type:string, comment:null), ] +PREHOOK: query: select * from view_using_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@base_table +PREHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +POSTHOOK: query: select * from view_using_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@base_table +POSTHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +PREHOOK: query: create function udf_lower as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: database:default +PREHOOK: Output: default.udf_lower +POSTHOOK: query: create function udf_lower as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: database:default +POSTHOOK: Output: default.udf_lower +PREHOOK: query: select udf_lower(upper_city) from view_using_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@base_table +PREHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +POSTHOOK: query: select udf_lower(upper_city) from view_using_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@base_table +POSTHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +PREHOOK: query: create database test +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:test +POSTHOOK: query: create database test +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:test +PREHOOK: query: create function test.UDF_upper as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: database:test +PREHOOK: Output: test.udf_upper +POSTHOOK: query: create function test.UDF_upper as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: database:test +POSTHOOK: Output: test.udf_upper +PREHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@base_table +PREHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +POSTHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@base_table +POSTHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +PREHOOK: query: select * from view_using_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@base_table +PREHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +POSTHOOK: query: select * from view_using_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@base_table +POSTHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +PREHOOK: query: select udf_lower(upper_city) from view_using_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@base_table +PREHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +POSTHOOK: query: select udf_lower(upper_city) from view_using_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@base_table +POSTHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +PREHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@base_table +PREHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +POSTHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@base_table +POSTHOOK: Input: default@view_using_udf +#### A masked pattern was here #### +PREHOOK: query: drop function test.UDF_Upper +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: database:test +PREHOOK: Output: test.UDF_Upper +POSTHOOK: query: drop function test.UDF_Upper +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: database:test +POSTHOOK: Output: test.UDF_Upper +PREHOOK: query: drop database test +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:test +PREHOOK: Output: database:test +POSTHOOK: query: drop database test +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:test +POSTHOOK: Output: database:test +PREHOOK: query: drop function udf_lower +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: database:default +PREHOOK: Output: default.udf_lower +POSTHOOK: query: drop function udf_lower +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: database:default +POSTHOOK: Output: default.udf_lower +PREHOOK: query: drop function udf_upper +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: database:default +PREHOOK: Output: default.udf_upper +POSTHOOK: query: drop function udf_upper +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: database:default +POSTHOOK: Output: default.udf_upper +PREHOOK: query: drop view view_using_udf +PREHOOK: type: DROPVIEW +PREHOOK: Input: default@view_using_udf +PREHOOK: Output: default@view_using_udf +POSTHOOK: query: drop view view_using_udf +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: default@view_using_udf +POSTHOOK: Output: default@view_using_udf +PREHOOK: query: drop table base_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@base_table +PREHOOK: Output: default@base_table +POSTHOOK: query: drop table base_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@base_table +POSTHOOK: Output: default@base_table