This is an automated email from the ASF dual-hosted git repository.

dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f8f6b167ed3 HIVE-26799: Make authorizations on custom UDFs involved in 
tables/view configurable (Sai Hemanth Gantasala, reviewed by John Sherman, 
Zhihua Deng)
f8f6b167ed3 is described below

commit f8f6b167ed3623f380d6a26903c54a8a98ddd9aa
Author: Sai Hemanth Gantasala 
<68923650+saihemanth-cloud...@users.noreply.github.com>
AuthorDate: Mon Dec 12 17:55:17 2022 -0800

    HIVE-26799: Make authorizations on custom UDFs involved in tables/view 
configurable (Sai Hemanth Gantasala, reviewed by John Sherman, Zhihua Deng)
    
    Closes #3821
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   3 +
 .../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java |  14 ++
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  25 +++
 .../authorization/command/CommandAuthorizerV2.java |  29 ++--
 .../authorization_functions_in_views.q             |  56 +++++++
 .../llap/authorization_functions_in_views.q.out    | 181 +++++++++++++++++++++
 6 files changed, 298 insertions(+), 10 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 44452b970e5..b3b715a81ae 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3599,6 +3599,9 @@ public class HiveConf extends Configuration {
     
HIVE_AUTHORIZATION_TABLES_ON_STORAGEHANDLERS("hive.security.authorization.tables.on.storagehandlers",
 true,
         "Enables authorization on tables with custom storage handlers as 
implemented by HIVE-24705. " +
         "Default setting is true. Useful for turning the feature off if the 
corresponding ranger patch is missing."),
+    
HIVE_AUTHORIZATION_FUNCTIONS_IN_VIEW("hive.security.authorization.functions.in.view",
 true,
+            "Enable authorization on functions/udfs used within a TABLE/VIEW 
during a select query. Default " +
+         "setting is true When set to false, the udf used in the table/view 
will not be autiozed during the select query"),
 
     // if this is not set default value is set during config initialization
     // Default value can't be set in this constructor as it would refer names 
in other ConfVars
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index 530b41a7056..b8e1e49386c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -142,6 +142,9 @@ public abstract class BaseSemanticAnalyzer {
   protected QueryProperties queryProperties;
   ParseContext pCtx = null;
 
+  //user defined functions in query
+  protected Set<String> userSuppliedFunctions;
+
   /**
    * A set of FileSinkOperators being written to in an ACID compliant way.  We 
need to remember
    * them here because when we build them we don't yet know the write id.  We 
need to go
@@ -286,6 +289,7 @@ public abstract class BaseSemanticAnalyzer {
       inputs = new LinkedHashSet<ReadEntity>();
       outputs = new LinkedHashSet<WriteEntity>();
       txnManager = queryState.getTxnManager();
+      userSuppliedFunctions = new HashSet<>();
     } catch (Exception e) {
       throw new SemanticException(e);
     }
@@ -1443,6 +1447,16 @@ public abstract class BaseSemanticAnalyzer {
     this.updateColumnAccessInfo = updateColumnAccessInfo;
   }
 
+  /**
+   * Gets the user supplied functions.
+   * Note 1: This list only accumulates UDFs explicitly mentioned in the query
+   * Note 2: This list will not include UDFs defined with views/tables
+   * @return List of String with names of UDFs.
+   */
+  public Set<String> getUserSuppliedFunctions() {
+    return userSuppliedFunctions;
+  }
+
   /**
    * Checks if given specification is proper specification for prefix of
    * partition cols, for table partitioned by ds, hr, min valid ones are
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 75c62ca6008..57702036cdb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -136,6 +136,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.FunctionInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.FunctionUtils;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.LimitOperator;
@@ -12539,6 +12540,26 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     }
   }
 
+  void gatherUserSuppliedFunctions(ASTNode ast) throws SemanticException {
+    int tokenType = ast.getToken().getType();
+    if (tokenType == HiveParser.TOK_FUNCTION ||
+            tokenType == HiveParser.TOK_FUNCTIONDI ||
+            tokenType == HiveParser.TOK_FUNCTIONSTAR) {
+      if (ast.getChild(0).getType() == HiveParser.Identifier) {
+        try {
+          String functionName = 
unescapeIdentifier(ast.getChild(0).getText()).toLowerCase();
+          String[] qualifiedFunctionName = 
FunctionUtils.getQualifiedFunctionNameParts(functionName);
+          
this.userSuppliedFunctions.add(qualifiedFunctionName[0]+"."+qualifiedFunctionName[1]);
+        } catch (HiveException ex) {
+          throw new SemanticException(ex.getMessage(), ex);
+        }
+      }
+    }
+    for (int i = 0; i < ast.getChildCount();i++) {
+      gatherUserSuppliedFunctions((ASTNode) ast.getChild(i));
+    }
+  }
+
   boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws 
SemanticException {
     ASTNode child = ast;
     this.ast = ast;
@@ -12601,6 +12622,10 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     // the basic idea is similar to unparseTranslator.
     tableMask = new TableMask(this, conf, ctx.isSkipTableMasking());
 
+    // Gather UDFs referenced in query before VIEW expansion. This is used to
+    // determine if authorization checks need to occur on the UDFs.
+    gatherUserSuppliedFunctions(child);
+
     // 4. continue analyzing from the child ASTNode.
     Phase1Ctx ctx_1 = initPhase1Ctx();
     if (!doPhase1(child, qb, ctx_1, plannerCtx)) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java
index 13281980cc1..27fbe94f38d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java
@@ -72,8 +72,8 @@ final class CommandAuthorizerV2 {
     List<WriteEntity> outputList = new ArrayList<WriteEntity>(outputs);
     addPermanentFunctionEntities(ss, inputList);
 
-    List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputList, 
selectTab2Cols, hiveOpType);
-    List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputList, 
updateTab2Cols, hiveOpType);
+    List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputList, 
selectTab2Cols, hiveOpType, sem);
+    List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputList, 
updateTab2Cols, hiveOpType, sem);
 
     HiveAuthzContext.Builder authzContextBuilder = new 
HiveAuthzContext.Builder();
     authzContextBuilder.setUserIpAddress(ss.getUserIpAddress());
@@ -98,13 +98,13 @@ final class CommandAuthorizerV2 {
   }
 
   private static List<HivePrivilegeObject> getHivePrivObjects(List<? extends 
Entity> privObjects,
-      Map<String, List<String>> tableName2Cols, HiveOperationType hiveOpType) 
throws HiveException {
+      Map<String, List<String>> tableName2Cols, HiveOperationType hiveOpType, 
BaseSemanticAnalyzer sem) throws HiveException {
     List<HivePrivilegeObject> hivePrivobjs = new 
ArrayList<HivePrivilegeObject>();
-    if (privObjects == null){
+    if (privObjects == null) {
       return hivePrivobjs;
     }
 
-    for (Entity privObject : privObjects){
+    for (Entity privObject : privObjects) {
       if (privObject.isDummy()) {
         //do not authorize dummy readEntity or writeEntity
         continue;
@@ -114,19 +114,19 @@ final class CommandAuthorizerV2 {
         // it's not inside a deferred authorized view.
         ReadEntity reTable = (ReadEntity)privObject;
         Boolean isDeferred = false;
-        if( reTable.getParents() != null && reTable.getParents().size() > 0){
-          for( ReadEntity re: reTable.getParents()){
+        if ( reTable.getParents() != null && reTable.getParents().size() > 0) {
+          for ( ReadEntity re: reTable.getParents()){
             if (re.getTyp() == Type.TABLE && re.getTable() != null ) {
               Table t = re.getTable();
-              if(!isDeferredAuthView(t)){
+              if (!isDeferredAuthView(t)) {
                 continue;
-              }else{
+              } else {
                 isDeferred = true;
               }
             }
           }
         }
-        if(!isDeferred){
+        if (!isDeferred) {
           continue;
         }
       }
@@ -139,6 +139,15 @@ final class CommandAuthorizerV2 {
         continue;
       }
 
+      if (privObject.getTyp() == Type.FUNCTION && 
!HiveConf.getBoolVar(SessionState.get().getConf(),
+              HiveConf.ConfVars.HIVE_AUTHORIZATION_FUNCTIONS_IN_VIEW) && 
hiveOpType == HiveOperationType.QUERY) {
+        String[] qualifiedFunctionName = new String[]{privObject.getDatabase() 
!= null ?
+                privObject.getDatabase().getName() :  null, 
privObject.getFunctionName()};
+        if (!sem.getUserSuppliedFunctions().contains(qualifiedFunctionName[0] 
+ "." + qualifiedFunctionName[1])) {
+          continue;
+        }
+      }
+
       addHivePrivObject(privObject, tableName2Cols, hivePrivobjs, hiveOpType);
     }
     return hivePrivobjs;
diff --git 
a/ql/src/test/queries/clientpositive/authorization_functions_in_views.q 
b/ql/src/test/queries/clientpositive/authorization_functions_in_views.q
new file mode 100644
index 00000000000..7bdf4a76baa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/authorization_functions_in_views.q
@@ -0,0 +1,56 @@
+--! qt:authorizer
+set user.name=hive_admin_user;
+
+-- admin required for create function
+set role ADMIN;
+
+set 
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider;
+set hive.security.authorization.enabled=true;
+
+drop function if exists udf_upper;
+
+drop function if exists udf_lower;
+
+drop table if exists base_table;
+
+drop view if exists view_using_udf;
+
+drop database if exists test;
+
+create function udf_upper as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
+
+create table base_table(city string);
+
+create view view_using_udf as select udf_upper(city) as upper_city from 
base_table;
+
+select * from view_using_udf;
+
+create function udf_lower as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower';
+
+select udf_lower(upper_city) from view_using_udf;
+
+create database test;
+
+create function test.UDF_upper as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
+
+select test.UDF_Upper(upper_city) from view_using_udf;
+
+set hive.security.authorization.functions.in.view=false;
+
+select * from view_using_udf;
+
+select udf_lower(upper_city) from view_using_udf;
+
+select test.UDF_Upper(upper_city) from view_using_udf;
+
+drop function test.UDF_Upper;
+
+drop database test;
+
+drop function udf_lower;
+
+drop function udf_upper;
+
+drop view view_using_udf;
+
+drop table base_table;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/authorization_functions_in_views.q.out
 
b/ql/src/test/results/clientpositive/llap/authorization_functions_in_views.q.out
new file mode 100644
index 00000000000..4094871cab2
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/authorization_functions_in_views.q.out
@@ -0,0 +1,181 @@
+PREHOOK: query: set role ADMIN
+PREHOOK: type: SHOW_ROLES
+POSTHOOK: query: set role ADMIN
+POSTHOOK: type: SHOW_ROLES
+PREHOOK: query: drop function if exists udf_upper
+PREHOOK: type: DROPFUNCTION
+POSTHOOK: query: drop function if exists udf_upper
+POSTHOOK: type: DROPFUNCTION
+PREHOOK: query: drop function if exists udf_lower
+PREHOOK: type: DROPFUNCTION
+POSTHOOK: query: drop function if exists udf_lower
+POSTHOOK: type: DROPFUNCTION
+PREHOOK: query: drop table if exists base_table
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists base_table
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop view if exists view_using_udf
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view if exists view_using_udf
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: drop database if exists test
+PREHOOK: type: DROPDATABASE
+POSTHOOK: query: drop database if exists test
+POSTHOOK: type: DROPDATABASE
+PREHOOK: query: create function udf_upper as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: database:default
+PREHOOK: Output: default.udf_upper
+POSTHOOK: query: create function udf_upper as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default.udf_upper
+PREHOOK: query: create table base_table(city string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@base_table
+POSTHOOK: query: create table base_table(city string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@base_table
+PREHOOK: query: create view view_using_udf as select udf_upper(city) as 
upper_city from base_table
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@base_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@view_using_udf
+POSTHOOK: query: create view view_using_udf as select udf_upper(city) as 
upper_city from base_table
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@base_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@view_using_udf
+POSTHOOK: Lineage: view_using_udf.upper_city EXPRESSION 
[(base_table)base_table.FieldSchema(name:city, type:string, comment:null), ]
+PREHOOK: query: select * from view_using_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@base_table
+PREHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select * from view_using_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@base_table
+POSTHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+PREHOOK: query: create function udf_lower as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: database:default
+PREHOOK: Output: default.udf_lower
+POSTHOOK: query: create function udf_lower as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default.udf_lower
+PREHOOK: query: select udf_lower(upper_city) from view_using_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@base_table
+PREHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select udf_lower(upper_city) from view_using_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@base_table
+POSTHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+PREHOOK: query: create database test
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:test
+POSTHOOK: query: create database test
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:test
+PREHOOK: query: create function test.UDF_upper as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: database:test
+PREHOOK: Output: test.udf_upper
+POSTHOOK: query: create function test.UDF_upper as 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: database:test
+POSTHOOK: Output: test.udf_upper
+PREHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@base_table
+PREHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@base_table
+POSTHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+PREHOOK: query: select * from view_using_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@base_table
+PREHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select * from view_using_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@base_table
+POSTHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+PREHOOK: query: select udf_lower(upper_city) from view_using_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@base_table
+PREHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select udf_lower(upper_city) from view_using_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@base_table
+POSTHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+PREHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@base_table
+PREHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select test.UDF_Upper(upper_city) from view_using_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@base_table
+POSTHOOK: Input: default@view_using_udf
+#### A masked pattern was here ####
+PREHOOK: query: drop function test.UDF_Upper
+PREHOOK: type: DROPFUNCTION
+PREHOOK: Output: database:test
+PREHOOK: Output: test.UDF_Upper
+POSTHOOK: query: drop function test.UDF_Upper
+POSTHOOK: type: DROPFUNCTION
+POSTHOOK: Output: database:test
+POSTHOOK: Output: test.UDF_Upper
+PREHOOK: query: drop database test
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:test
+PREHOOK: Output: database:test
+POSTHOOK: query: drop database test
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:test
+POSTHOOK: Output: database:test
+PREHOOK: query: drop function udf_lower
+PREHOOK: type: DROPFUNCTION
+PREHOOK: Output: database:default
+PREHOOK: Output: default.udf_lower
+POSTHOOK: query: drop function udf_lower
+POSTHOOK: type: DROPFUNCTION
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default.udf_lower
+PREHOOK: query: drop function udf_upper
+PREHOOK: type: DROPFUNCTION
+PREHOOK: Output: database:default
+PREHOOK: Output: default.udf_upper
+POSTHOOK: query: drop function udf_upper
+POSTHOOK: type: DROPFUNCTION
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default.udf_upper
+PREHOOK: query: drop view view_using_udf
+PREHOOK: type: DROPVIEW
+PREHOOK: Input: default@view_using_udf
+PREHOOK: Output: default@view_using_udf
+POSTHOOK: query: drop view view_using_udf
+POSTHOOK: type: DROPVIEW
+POSTHOOK: Input: default@view_using_udf
+POSTHOOK: Output: default@view_using_udf
+PREHOOK: query: drop table base_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@base_table
+PREHOOK: Output: default@base_table
+POSTHOOK: query: drop table base_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@base_table
+POSTHOOK: Output: default@base_table

Reply via email to