This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 0b69eda15ff HIVE-28768: Remove hardcoded post exec hooks (Raghav
Aggarwal, reviewed by Krisztian Kasa, Zsolt Miskolczi)
0b69eda15ff is described below
commit 0b69eda15ff9824aabaa547b5d6feff04aa83c95
Author: Raghav Aggarwal <[email protected]>
AuthorDate: Thu Feb 27 15:40:57 2025 +0530
HIVE-28768: Remove hardcoded post exec hooks (Raghav Aggarwal, reviewed by
Krisztian Kasa, Zsolt Miskolczi)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 10 +++++++++-
data/conf/hive-site.xml | 6 ++++++
data/conf/iceberg/llap/hive-site.xml | 6 ++++++
data/conf/iceberg/tez/hive-site.xml | 6 ++++++
data/conf/llap/hive-site.xml | 6 ++++++
data/conf/mr/hive-site.xml | 6 ++++++
data/conf/rlist/hive-site.xml | 6 ++++++
data/conf/tez/hive-site.xml | 6 ++++++
.../hive-blobstore/src/test/resources/hive-site.xml | 6 ++++++
.../apache/hadoop/hive/ql/optimizer/Optimizer.java | 14 ++------------
.../hadoop/hive/ql/optimizer/lineage/Generator.java | 2 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 21 +++++++--------------
12 files changed, 67 insertions(+), 28 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 2b8901f6043..31dc00f4019 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -64,6 +64,7 @@
import java.nio.charset.StandardCharsets;
import java.time.ZoneId;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
@@ -3880,7 +3881,7 @@ public static enum ConfVars {
HIVE_LINEAGE_INFO("hive.lineage.hook.info.enabled", false,
"Whether Hive provides lineage information to hooks." +
"Deprecated: use hive.lineage.statement.filter instead."),
- HIVE_LINEAGE_STATEMENT_FILTER("hive.lineage.statement.filter", "ALL",
+ HIVE_LINEAGE_STATEMENT_FILTER("hive.lineage.statement.filter", "NONE",
"Whether Hive provides lineage information to hooks for the specified
statements only, " +
"the value is a comma-separated list (ex.:
CREATE_MATERIALIZED_VIEW," +
"CREATE_TABLE,CREATE_TABLE_AS_SELECT). Possible values are:
CREATE_TABLE, CREATE_TABLE_AS_SELECT, " +
@@ -7303,6 +7304,13 @@ public Map<String, String> subtree(String string) {
return ret;
}
+ public static boolean shouldComputeLineage(HiveConf conf) {
+ Collection<String> lineageFilter =
+
conf.getTrimmedStringCollection(HiveConf.ConfVars.HIVE_LINEAGE_STATEMENT_FILTER.varname);
+ return !(lineageFilter.isEmpty() || lineageFilter.contains("NONE"))
+ || conf.getBoolVar(ConfVars.HIVE_LINEAGE_INFO);
+ }
+
// sync all configs from given conf
public void syncFromConf(HiveConf conf) {
Iterator<Map.Entry<String, String>> iter = conf.iterator();
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 5d858c6840f..94f9d036542 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -154,6 +154,12 @@
<description>Pre Execute Hook for Tests</description>
</property>
+<property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated</description>
+</property>
+
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
diff --git a/data/conf/iceberg/llap/hive-site.xml
b/data/conf/iceberg/llap/hive-site.xml
index 4b0ca9a6402..545a8b82461 100644
--- a/data/conf/iceberg/llap/hive-site.xml
+++ b/data/conf/iceberg/llap/hive-site.xml
@@ -154,6 +154,12 @@
<description>Pre Execute Hook for Tests</description>
</property>
+ <property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated</description>
+ </property>
+
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
diff --git a/data/conf/iceberg/tez/hive-site.xml
b/data/conf/iceberg/tez/hive-site.xml
index 370d36af29a..c122c49e514 100644
--- a/data/conf/iceberg/tez/hive-site.xml
+++ b/data/conf/iceberg/tez/hive-site.xml
@@ -145,6 +145,12 @@
<description>Location of the structured hive logs</description>
</property>
+<property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated</description>
+</property>
+
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml
index d89206c7436..d91908a5c2d 100644
--- a/data/conf/llap/hive-site.xml
+++ b/data/conf/llap/hive-site.xml
@@ -160,6 +160,12 @@
<description>Location of the structured hive logs</description>
</property>
+<property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated</description>
+</property>
+
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/data/conf/mr/hive-site.xml b/data/conf/mr/hive-site.xml
index 3367ef01cab..8cd1d6a0805 100644
--- a/data/conf/mr/hive-site.xml
+++ b/data/conf/mr/hive-site.xml
@@ -139,6 +139,12 @@
<description>Location of the structured hive logs</description>
</property>
+<property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated</description>
+</property>
+
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables,
org.apache.hadoop.hive.ql.hooks.MaterializedViewRegistryPropertiesHook</value>
diff --git a/data/conf/rlist/hive-site.xml b/data/conf/rlist/hive-site.xml
index 9768b759fb3..3d628f3d3d6 100644
--- a/data/conf/rlist/hive-site.xml
+++ b/data/conf/rlist/hive-site.xml
@@ -141,6 +141,12 @@
<description>Location of the structured hive logs</description>
</property>
+<property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated</description>
+</property>
+
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index aa1f9147a32..90a0319f32f 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -152,6 +152,12 @@
<description>Pre Execute Hook for Tests</description>
</property>
+<property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated</description>
+</property>
+
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
diff --git a/itests/hive-blobstore/src/test/resources/hive-site.xml
b/itests/hive-blobstore/src/test/resources/hive-site.xml
index 76b4fc621d3..52a431799a9 100644
--- a/itests/hive-blobstore/src/test/resources/hive-site.xml
+++ b/itests/hive-blobstore/src/test/resources/hive-site.xml
@@ -152,6 +152,12 @@
<description>Location of the structured hive logs</description>
</property>
+ <property>
+ <name>hive.lineage.statement.filter</name>
+ <value>ALL</value>
+ <description>Specify the types of statements for which column lineage
information is generated.</description>
+ </property>
+
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index c483168d62e..be36ed02667 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -20,8 +20,8 @@
import java.util.ArrayList;
import java.util.List;
-import java.util.Set;
+import static org.apache.hadoop.hive.conf.HiveConf.shouldComputeLineage;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import
org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
@@ -43,10 +43,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Splitter;
-import com.google.common.base.Strings;
-import com.google.common.collect.Sets;
-
/**
* Implementation of the optimizer.
*/
@@ -72,13 +68,7 @@ public void initialize(HiveConf hiveConf) {
transformations.add(new HiveOpConverterPostProc());
// Add the transformation that computes the lineage information.
- Set<String> postExecHooks = Sets.newHashSet(
- Splitter.on(",").trimResults().omitEmptyStrings().split(
- Strings.nullToEmpty(HiveConf.getVar(hiveConf,
HiveConf.ConfVars.POST_EXEC_HOOKS))));
- if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_LINEAGE_INFO)
- ||
postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter")
- ||
postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger")
- || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
+ if (shouldComputeLineage(hiveConf)) {
transformations.add(Generator.fromConf(hiveConf));
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
index 20889eb849a..7075a379907 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java
@@ -90,7 +90,7 @@ public static Generator fromConf(HiveConf conf) {
static Predicate<ParseContext> createFilterPredicateFromConf(Configuration
conf) {
Set<LineageInfoFilter> operations = new HashSet<>();
boolean noneSpecified = false;
- for (String valueText :
conf.getStringCollection(HiveConf.ConfVars.HIVE_LINEAGE_STATEMENT_FILTER.varname))
{
+ for (String valueText :
conf.getTrimmedStringCollection(HiveConf.ConfVars.HIVE_LINEAGE_STATEMENT_FILTER.varname))
{
LineageInfoFilter enumValue =
EnumUtils.getEnumIgnoreCase(LineageInfoFilter.class, valueText);
if (enumValue == null) {
throw new EnumConstantNotPresentException(LineageInfoFilter.class,
valueText);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 08739d9f5b4..4be3a32b0f0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -25,6 +25,7 @@
import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ARCHIVE_ENABLED;
import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_STATS_DBCLASS;
+import static org.apache.hadoop.hive.conf.HiveConf.shouldComputeLineage;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
@@ -328,7 +329,6 @@
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
-import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMap;
@@ -13261,21 +13261,14 @@ void analyzeInternal(ASTNode ast,
Supplier<PlannerContext> pcf) throws SemanticE
// Set the mapjoin hint if it needs to be disabled.
pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
- if (forViewCreation) {
+ if (forViewCreation && shouldComputeLineage(conf)) {
// Generate lineage info if LineageLogger hook is configured.
// Add the transformation that computes the lineage information.
- Set<String> postExecHooks =
Sets.newHashSet(Splitter.on(",").trimResults()
- .omitEmptyStrings()
- .split(Strings.nullToEmpty(HiveConf.getVar(conf,
HiveConf.ConfVars.POST_EXEC_HOOKS))));
- if
(postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter")
- ||
postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger")
- || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
- List<Transform> transformations = new ArrayList<Transform>();
- transformations.add(new HiveOpConverterPostProc());
- transformations.add(Generator.fromConf(conf));
- for (Transform t : transformations) {
- pCtx = t.transform(pCtx);
- }
+ List<Transform> transformations = new ArrayList<Transform>();
+ transformations.add(new HiveOpConverterPostProc());
+ transformations.add(Generator.fromConf(conf));
+ for (Transform t : transformations) {
+ pCtx = t.transform(pCtx);
}
}
perfLogger.perfLogEnd(this.getClass().getName(),
PerfLogger.PARSE_CONTEXT_GENERATION);