This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 0b69eda15ff HIVE-28768: Remove hardcoded post exec hooks (Raghav 
Aggarwal, reviewed by Krisztian Kasa, Zsolt Miskolczi)
0b69eda15ff is described below

commit 0b69eda15ff9824aabaa547b5d6feff04aa83c95
Author: Raghav Aggarwal <[email protected]>
AuthorDate: Thu Feb 27 15:40:57 2025 +0530

    HIVE-28768: Remove hardcoded post exec hooks (Raghav Aggarwal, reviewed by 
Krisztian Kasa, Zsolt Miskolczi)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java  | 10 +++++++++-
 data/conf/hive-site.xml                             |  6 ++++++
 data/conf/iceberg/llap/hive-site.xml                |  6 ++++++
 data/conf/iceberg/tez/hive-site.xml                 |  6 ++++++
 data/conf/llap/hive-site.xml                        |  6 ++++++
 data/conf/mr/hive-site.xml                          |  6 ++++++
 data/conf/rlist/hive-site.xml                       |  6 ++++++
 data/conf/tez/hive-site.xml                         |  6 ++++++
 .../hive-blobstore/src/test/resources/hive-site.xml |  6 ++++++
 .../apache/hadoop/hive/ql/optimizer/Optimizer.java  | 14 ++------------
 .../hadoop/hive/ql/optimizer/lineage/Generator.java |  2 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java      | 21 +++++++--------------
 12 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 2b8901f6043..31dc00f4019 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -64,6 +64,7 @@
 import java.nio.charset.StandardCharsets;
 import java.time.ZoneId;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
@@ -3880,7 +3881,7 @@ public static enum ConfVars {
     HIVE_LINEAGE_INFO("hive.lineage.hook.info.enabled", false,
         "Whether Hive provides lineage information to hooks." +
             "Deprecated: use hive.lineage.statement.filter instead."),
-    HIVE_LINEAGE_STATEMENT_FILTER("hive.lineage.statement.filter", "ALL",
+    HIVE_LINEAGE_STATEMENT_FILTER("hive.lineage.statement.filter", "NONE",
         "Whether Hive provides lineage information to hooks for the specified 
statements only, " +
             "the value is a comma-separated list (ex.: 
CREATE_MATERIALIZED_VIEW," +
             "CREATE_TABLE,CREATE_TABLE_AS_SELECT). Possible values are: 
CREATE_TABLE, CREATE_TABLE_AS_SELECT, " +
@@ -7303,6 +7304,13 @@ public Map<String, String> subtree(String string) {
     return ret;
   }
 
+  public static boolean shouldComputeLineage(HiveConf conf) {
+    Collection<String> lineageFilter =
+      
conf.getTrimmedStringCollection(HiveConf.ConfVars.HIVE_LINEAGE_STATEMENT_FILTER.varname);
+    return !(lineageFilter.isEmpty() || lineageFilter.contains("NONE"))
+      || conf.getBoolVar(ConfVars.HIVE_LINEAGE_INFO);
+  }
+
   // sync all configs from given conf
   public void syncFromConf(HiveConf conf) {
     Iterator<Map.Entry<String, String>> iter = conf.iterator();
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 5d858c6840f..94f9d036542 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -154,6 +154,12 @@
   <description>Pre Execute Hook for Tests</description>
 </property>
 
+<property>
+  <name>hive.lineage.statement.filter</name>
+  <value>ALL</value>
+  <description>Specify the types of statements for which column lineage 
information is generated</description>
+</property>
+
 <property>
   <name>hive.exec.post.hooks</name>
   <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
diff --git a/data/conf/iceberg/llap/hive-site.xml 
b/data/conf/iceberg/llap/hive-site.xml
index 4b0ca9a6402..545a8b82461 100644
--- a/data/conf/iceberg/llap/hive-site.xml
+++ b/data/conf/iceberg/llap/hive-site.xml
@@ -154,6 +154,12 @@
         <description>Pre Execute Hook for Tests</description>
     </property>
 
+    <property>
+      <name>hive.lineage.statement.filter</name>
+      <value>ALL</value>
+      <description>Specify the types of statements for which column lineage 
information is generated</description>
+    </property>
+
     <property>
         <name>hive.exec.post.hooks</name>
         <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
diff --git a/data/conf/iceberg/tez/hive-site.xml 
b/data/conf/iceberg/tez/hive-site.xml
index 370d36af29a..c122c49e514 100644
--- a/data/conf/iceberg/tez/hive-site.xml
+++ b/data/conf/iceberg/tez/hive-site.xml
@@ -145,6 +145,12 @@
   <description>Location of the structured hive logs</description>
 </property>
 
+<property>
+  <name>hive.lineage.statement.filter</name>
+  <value>ALL</value>
+  <description>Specify the types of statements for which column lineage 
information is generated</description>
+</property>
+
 <property>
   <name>hive.exec.pre.hooks</name>
   <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml
index d89206c7436..d91908a5c2d 100644
--- a/data/conf/llap/hive-site.xml
+++ b/data/conf/llap/hive-site.xml
@@ -160,6 +160,12 @@
   <description>Location of the structured hive logs</description>
 </property>
 
+<property>
+  <name>hive.lineage.statement.filter</name>
+  <value>ALL</value>
+  <description>Specify the types of statements for which column lineage 
information is generated</description>
+</property>
+
 <property>
   <name>hive.exec.pre.hooks</name>
   <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/data/conf/mr/hive-site.xml b/data/conf/mr/hive-site.xml
index 3367ef01cab..8cd1d6a0805 100644
--- a/data/conf/mr/hive-site.xml
+++ b/data/conf/mr/hive-site.xml
@@ -139,6 +139,12 @@
   <description>Location of the structured hive logs</description>
 </property>
 
+<property>
+  <name>hive.lineage.statement.filter</name>
+  <value>ALL</value>
+  <description>Specify the types of statements for which column lineage 
information is generated</description>
+</property>
+
 <property>
   <name>hive.exec.pre.hooks</name>
   <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables, 
org.apache.hadoop.hive.ql.hooks.MaterializedViewRegistryPropertiesHook</value>
diff --git a/data/conf/rlist/hive-site.xml b/data/conf/rlist/hive-site.xml
index 9768b759fb3..3d628f3d3d6 100644
--- a/data/conf/rlist/hive-site.xml
+++ b/data/conf/rlist/hive-site.xml
@@ -141,6 +141,12 @@
   <description>Location of the structured hive logs</description>
 </property>
 
+<property>
+  <name>hive.lineage.statement.filter</name>
+  <value>ALL</value>
+  <description>Specify the types of statements for which column lineage 
information is generated</description>
+</property>
+
 <property>
   <name>hive.exec.pre.hooks</name>
   <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index aa1f9147a32..90a0319f32f 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -152,6 +152,12 @@
   <description>Pre Execute Hook for Tests</description>
 </property>
 
+<property>
+  <name>hive.lineage.statement.filter</name>
+  <value>ALL</value>
+  <description>Specify the types of statements for which column lineage 
information is generated</description>
+</property>
+
 <property>
   <name>hive.exec.post.hooks</name>
   <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
diff --git a/itests/hive-blobstore/src/test/resources/hive-site.xml 
b/itests/hive-blobstore/src/test/resources/hive-site.xml
index 76b4fc621d3..52a431799a9 100644
--- a/itests/hive-blobstore/src/test/resources/hive-site.xml
+++ b/itests/hive-blobstore/src/test/resources/hive-site.xml
@@ -152,6 +152,12 @@
     <description>Location of the structured hive logs</description>
   </property>
 
+  <property>
+    <name>hive.lineage.statement.filter</name>
+    <value>ALL</value>
+    <description>Specify the types of statements for which column lineage 
information is generated.</description>
+  </property>
+
   <property>
     <name>hive.exec.pre.hooks</name>
     <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index c483168d62e..be36ed02667 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -20,8 +20,8 @@
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Set;
 
+import static org.apache.hadoop.hive.conf.HiveConf.shouldComputeLineage;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
@@ -43,10 +43,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.base.Splitter;
-import com.google.common.base.Strings;
-import com.google.common.collect.Sets;
-
 /**
  * Implementation of the optimizer.
  */
@@ -72,13 +68,7 @@ public void initialize(HiveConf hiveConf) {
     transformations.add(new HiveOpConverterPostProc());
 
     // Add the transformation that computes the lineage information.
-    Set<String> postExecHooks = Sets.newHashSet(
-      Splitter.on(",").trimResults().omitEmptyStrings().split(
-        Strings.nullToEmpty(HiveConf.getVar(hiveConf, 
HiveConf.ConfVars.POST_EXEC_HOOKS))));
-    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_LINEAGE_INFO)
-        || 
postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter")
-        || 
postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger")
-        || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
+    if (shouldComputeLineage(hiveConf)) {
       transformations.add(Generator.fromConf(hiveConf));
     }
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
index 20889eb849a..7075a379907 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
@@ -90,7 +90,7 @@ public static Generator fromConf(HiveConf conf) {
   static Predicate<ParseContext> createFilterPredicateFromConf(Configuration 
conf) {
     Set<LineageInfoFilter> operations = new HashSet<>();
     boolean noneSpecified = false;
-    for (String valueText : 
conf.getStringCollection(HiveConf.ConfVars.HIVE_LINEAGE_STATEMENT_FILTER.varname))
 {
+    for (String valueText : 
conf.getTrimmedStringCollection(HiveConf.ConfVars.HIVE_LINEAGE_STATEMENT_FILTER.varname))
 {
       LineageInfoFilter enumValue = 
EnumUtils.getEnumIgnoreCase(LineageInfoFilter.class, valueText);
       if (enumValue == null) {
         throw new EnumConstantNotPresentException(LineageInfoFilter.class, 
valueText);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 08739d9f5b4..4be3a32b0f0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -25,6 +25,7 @@
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ARCHIVE_ENABLED;
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER;
 import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_STATS_DBCLASS;
+import static org.apache.hadoop.hive.conf.HiveConf.shouldComputeLineage;
 import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION;
 import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
 import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
@@ -328,7 +329,6 @@
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ReflectionUtils;
 
-import com.google.common.base.Splitter;
 import com.google.common.base.Strings;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.ImmutableMap;
@@ -13261,21 +13261,14 @@ void analyzeInternal(ASTNode ast, 
Supplier<PlannerContext> pcf) throws SemanticE
     // Set the mapjoin hint if it needs to be disabled.
     
pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
 
-    if (forViewCreation) {
+    if (forViewCreation && shouldComputeLineage(conf)) {
       // Generate lineage info if LineageLogger hook is configured.
       // Add the transformation that computes the lineage information.
-      Set<String> postExecHooks = 
Sets.newHashSet(Splitter.on(",").trimResults()
-          .omitEmptyStrings()
-          .split(Strings.nullToEmpty(HiveConf.getVar(conf, 
HiveConf.ConfVars.POST_EXEC_HOOKS))));
-      if 
(postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter")
-          || 
postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger")
-          || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
-        List<Transform> transformations = new ArrayList<Transform>();
-        transformations.add(new HiveOpConverterPostProc());
-        transformations.add(Generator.fromConf(conf));
-        for (Transform t : transformations) {
-          pCtx = t.transform(pCtx);
-        }
+      List<Transform> transformations = new ArrayList<Transform>();
+      transformations.add(new HiveOpConverterPostProc());
+      transformations.add(Generator.fromConf(conf));
+      for (Transform t : transformations) {
+        pCtx = t.transform(pCtx);
       }
     }
     perfLogger.perfLogEnd(this.getClass().getName(), 
PerfLogger.PARSE_CONTEXT_GENERATION);

Reply via email to