[hive] branch master updated: HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843)
This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 2bdf0cc HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843) 2bdf0cc is described below commit 2bdf0ccb94f9555dd0c06131a7fb5defcf8010ed Author: hsnusonic AuthorDate: Fri Dec 10 06:16:04 2021 -0800 HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843) --- .../hadoop/hive/metastore/MetaStoreDirectSql.java | 27 +++ .../hadoop/hive/metastore/TestObjectStore.java | 38 -- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index d28e630..b200608 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -29,12 +29,15 @@ import java.sql.Statement; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import java.util.stream.Collectors; @@ -151,7 +154,7 @@ class MetaStoreDirectSql { * @return The concatenated list * @throws MetaException If the list contains wrong data */ - public static String getIdListForIn(List objectIds) throws MetaException { + public static String getIdListForIn(Collection objectIds) throws MetaException { return objectIds.stream() .map(i -> i.toString()) .collect(Collectors.joining(",")); @@ -2622,7 +2625,7 @@ class MetaStoreDirectSql { + "WHERE " + PARTITIONS + ".\"PART_ID\" in (" + partitionIds + ")"; List sdIdList = new ArrayList<>(partitionIdList.size()); -List columnDescriptorIdList = new ArrayList<>(1); +List columnDescriptorIdList = new ArrayList<>(1); List serdeIdList = new ArrayList<>(partitionIdList.size()); try (QueryWrapper query = new QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) { List sqlResult = MetastoreDirectSqlUtils @@ -2808,7 +2811,7 @@ class MetaStoreDirectSql { * @throws MetaException If there is an SQL exception during the execution it converted to * MetaException */ - private void dropDanglingColumnDescriptors(List columnDescriptorIdList) + private void dropDanglingColumnDescriptors(List columnDescriptorIdList) throws MetaException { if (columnDescriptorIdList.isEmpty()) { return; @@ -2818,26 +2821,24 @@ class MetaStoreDirectSql { // Drop column descriptor, if no relation left queryText = -"SELECT " + SDS + ".\"CD_ID\", count(1) " +"SELECT " + SDS + ".\"CD_ID\" " + "from " + SDS + " " + "WHERE " + SDS + ".\"CD_ID\" in (" + colIds + ") " + "GROUP BY " + SDS + ".\"CD_ID\""; -List danglingColumnDescriptorIdList = new ArrayList<>(columnDescriptorIdList.size()); +Set danglingColumnDescriptorIdSet = new HashSet<>(columnDescriptorIdList); try (QueryWrapper query = new QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) { - List sqlResult = MetastoreDirectSqlUtils - .ensureList(executeWithArray(query, null, queryText)); + List sqlResult = executeWithArray(query, null, queryText); if (!sqlResult.isEmpty()) { -for (Object[] fields : sqlResult) { - if (MetastoreDirectSqlUtils.extractSqlInt(fields[1]) == 0) { - danglingColumnDescriptorIdList.add(MetastoreDirectSqlUtils.extractSqlLong(fields[0])); - } +for (Long cdId : sqlResult) { + // the returned CD is not dangling, so remove it from the list + danglingColumnDescriptorIdSet.remove(cdId); } } } -if (!danglingColumnDescriptorIdList.isEmpty()) { +if (!danglingColumnDescriptorIdSet.isEmpty()) { try { -String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdList); +String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdSet); // Drop the columns_v2 queryText = "delete from " + COLUMNS_V2 + " where \"CD_ID\" in (" + danglingCDIds + ")"; diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java b/standalone-metastore/metastore-server/src
[hive] branch master updated (f2d29ef -> f7448be)
This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from f2d29ef HIVE-25768: Extend lifetime of query-level HMS response cache (John Sherman, reviewed by Stephen Carlin, Krisztian Kasa) add f7448be HIVE-25780: DistinctExpansion creates more than 64 grouping sets II (#2849) (Zoltan Haindrich reviewed by Krisztian Kasa) No new revisions were added by this update. Summary of changes: .../test/resources/testconfiguration.properties| 3 +- .../rules/HiveExpandDistinctAggregatesRule.java| 20 +- .../queries/clientpositive/multi_count_distinct.q | 13 + .../{tez => llap}/multi_count_distinct.q.out | 345 ++--- 4 files changed, 266 insertions(+), 115 deletions(-) rename ql/src/test/results/clientpositive/{tez => llap}/multi_count_distinct.q.out (83%)
[hive] branch master updated: HIVE-25768: Extend lifetime of query-level HMS response cache (John Sherman, reviewed by Stephen Carlin, Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f2d29ef HIVE-25768: Extend lifetime of query-level HMS response cache (John Sherman, reviewed by Stephen Carlin, Krisztian Kasa) f2d29ef is described below commit f2d29efb557016a04987ab352c2c9609475f84d3 Author: John Sherman AuthorDate: Fri Dec 10 02:52:16 2021 -0800 HIVE-25768: Extend lifetime of query-level HMS response cache (John Sherman, reviewed by Stephen Carlin, Krisztian Kasa) --- .../apache/hadoop/hive/ql/DriverTxnHandler.java| 3 ++ .../java/org/apache/hadoop/hive/ql/Executor.java | 4 +++ .../java/org/apache/hadoop/hive/ql/QueryState.java | 41 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 22 +--- .../hadoop/hive/ql/session/SessionState.java | 29 +++ 5 files changed, 53 insertions(+), 46 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java index a6d3481..09fc767 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java @@ -224,6 +224,8 @@ class DriverTxnHandler { } try { + // Ensure we answer any metadata calls with fresh responses + driverContext.getQueryState().disableHMSCache(); setWriteIdForAcidFileSinks(); allocateWriteIdForAcidAnalyzeTable(); boolean hasAcidDdl = setWriteIdForAcidDdl(); @@ -245,6 +247,7 @@ class DriverTxnHandler { throw DriverUtils.createProcessorException(driverContext, 10, errorMessage, ErrorMsg.findSQLState(e.getMessage()), e); } finally { + driverContext.getQueryState().enableHMSCache(); perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java index ce47d17..6b5ee95 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java @@ -105,7 +105,10 @@ public class Executor { preExecutionActions(); preExecutionCacheActions(); + // Disable HMS cache so any metadata calls during execution get fresh responses. + driverContext.getQueryState().disableHMSCache(); runTasks(noName); + driverContext.getQueryState().enableHMSCache(); postExecutionCacheActions(); postExecutionActions(); } catch (CommandProcessorException cpe) { @@ -118,6 +121,7 @@ public class Executor { handleException(hookContext, e); } finally { cleanUp(noName, hookContext, executionError); + driverContext.getQueryState().enableHMSCache(); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java index 5edebca..b39037c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java @@ -30,12 +30,17 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.tez.dag.api.TezConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** * The class to store query level info such as queryId. Multiple queries can run * in the same session, so SessionState is to hold common session related info, and * each QueryState is to hold query related info. */ public class QueryState { + private static final Logger LOG = LoggerFactory.getLogger(QueryState.class); + /** * current configuration. */ @@ -68,6 +73,17 @@ public class QueryState { private final Map resourceMap = new HashMap<>(); /** + * Cache of HMS requests/responses utilized by SessionHiveMetaStoreClient. + */ + private Map hmsCache; + + /** + * Tracks if HMS cache should be used to answer metadata requests. + * In some sections, it makes sense to disable the cache to get fresh responses. + */ + private boolean hmsCacheEnabled; + + /** * query level lock for ConditionalTask#resolveTask. */ private final ReentrantLock resolveConditionalTaskLock = new ReentrantLock(true); @@ -82,13 +98,36 @@ public class QueryState { // Get the query id stored in query specific config. public String getQueryId() { -return (queryConf.getVar(HiveConf.ConfVars.HIVEQUERYID)); +return queryConf.getVar(HiveConf.ConfVars.HIVEQUERYID); } public String getQueryString() { return queryConf.getQueryString(); } + // Returns the HMS cache if it is currently enabled + public Map getHMSCache() { +return hmsCacheEnabled ? hmsCache : null; + } + + /** + * Disable the HMS cache. Useful in situations when you + * mus