[hive] branch master updated: HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843)

2021-12-10 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 2bdf0cc  HIVE-25773: Column descriptors might not be deleted via 
direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843)
2bdf0cc is described below

commit 2bdf0ccb94f9555dd0c06131a7fb5defcf8010ed
Author: hsnusonic 
AuthorDate: Fri Dec 10 06:16:04 2021 -0800

HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen 
Lai reviewed by Peter Vary) (#2843)
---
 .../hadoop/hive/metastore/MetaStoreDirectSql.java  | 27 +++
 .../hadoop/hive/metastore/TestObjectStore.java | 38 --
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index d28e630..b200608 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -29,12 +29,15 @@ import java.sql.Statement;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.stream.Collectors;
 
@@ -151,7 +154,7 @@ class MetaStoreDirectSql {
* @return The concatenated list
* @throws MetaException If the list contains wrong data
*/
-  public static  String getIdListForIn(List objectIds) throws 
MetaException {
+  public static  String getIdListForIn(Collection objectIds) throws 
MetaException {
 return objectIds.stream()
.map(i -> i.toString())
.collect(Collectors.joining(","));
@@ -2622,7 +2625,7 @@ class MetaStoreDirectSql {
 + "WHERE " + PARTITIONS + ".\"PART_ID\" in (" + partitionIds + ")";
 
 List sdIdList = new ArrayList<>(partitionIdList.size());
-List columnDescriptorIdList = new ArrayList<>(1);
+List columnDescriptorIdList = new ArrayList<>(1);
 List serdeIdList = new ArrayList<>(partitionIdList.size());
 try (QueryWrapper query = new 
QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) {
   List sqlResult = MetastoreDirectSqlUtils
@@ -2808,7 +2811,7 @@ class MetaStoreDirectSql {
* @throws MetaException If there is an SQL exception during the execution 
it converted to
* MetaException
*/
-  private void dropDanglingColumnDescriptors(List 
columnDescriptorIdList)
+  private void dropDanglingColumnDescriptors(List columnDescriptorIdList)
   throws MetaException {
 if (columnDescriptorIdList.isEmpty()) {
   return;
@@ -2818,26 +2821,24 @@ class MetaStoreDirectSql {
 
 // Drop column descriptor, if no relation left
 queryText =
-"SELECT " + SDS + ".\"CD_ID\", count(1) "
+"SELECT " + SDS + ".\"CD_ID\" "
 + "from " + SDS + " "
 + "WHERE " + SDS + ".\"CD_ID\" in (" + colIds + ") "
 + "GROUP BY " + SDS + ".\"CD_ID\"";
-List danglingColumnDescriptorIdList = new 
ArrayList<>(columnDescriptorIdList.size());
+Set danglingColumnDescriptorIdSet = new 
HashSet<>(columnDescriptorIdList);
 try (QueryWrapper query = new 
QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) {
-  List sqlResult = MetastoreDirectSqlUtils
-  .ensureList(executeWithArray(query, null, queryText));
+  List sqlResult = executeWithArray(query, null, queryText);
 
   if (!sqlResult.isEmpty()) {
-for (Object[] fields : sqlResult) {
-  if (MetastoreDirectSqlUtils.extractSqlInt(fields[1]) == 0) {
-
danglingColumnDescriptorIdList.add(MetastoreDirectSqlUtils.extractSqlLong(fields[0]));
-  }
+for (Long cdId : sqlResult) {
+  // the returned CD is not dangling, so remove it from the list
+  danglingColumnDescriptorIdSet.remove(cdId);
 }
   }
 }
-if (!danglingColumnDescriptorIdList.isEmpty()) {
+if (!danglingColumnDescriptorIdSet.isEmpty()) {
   try {
-String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdList);
+String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdSet);
 
 // Drop the columns_v2
 queryText = "delete from " + COLUMNS_V2 + " where \"CD_ID\" in (" + 
danglingCDIds + ")";
diff --git 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
 

[hive] branch master updated (f2d29ef -> f7448be)

2021-12-10 Thread kgyrtkirk
This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


from f2d29ef  HIVE-25768: Extend lifetime of query-level HMS response cache 
(John Sherman, reviewed by Stephen Carlin, Krisztian Kasa)
 add f7448be  HIVE-25780: DistinctExpansion creates more than 64 grouping 
sets II (#2849) (Zoltan Haindrich reviewed by Krisztian Kasa)

No new revisions were added by this update.

Summary of changes:
 .../test/resources/testconfiguration.properties|   3 +-
 .../rules/HiveExpandDistinctAggregatesRule.java|  20 +-
 .../queries/clientpositive/multi_count_distinct.q  |  13 +
 .../{tez => llap}/multi_count_distinct.q.out   | 345 ++---
 4 files changed, 266 insertions(+), 115 deletions(-)
 rename ql/src/test/results/clientpositive/{tez => 
llap}/multi_count_distinct.q.out (83%)


[hive] branch master updated: HIVE-25768: Extend lifetime of query-level HMS response cache (John Sherman, reviewed by Stephen Carlin, Krisztian Kasa)

2021-12-10 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f2d29ef  HIVE-25768: Extend lifetime of query-level HMS response cache 
(John Sherman, reviewed by Stephen Carlin, Krisztian Kasa)
f2d29ef is described below

commit f2d29efb557016a04987ab352c2c9609475f84d3
Author: John Sherman 
AuthorDate: Fri Dec 10 02:52:16 2021 -0800

HIVE-25768: Extend lifetime of query-level HMS response cache (John 
Sherman, reviewed by Stephen Carlin, Krisztian Kasa)
---
 .../apache/hadoop/hive/ql/DriverTxnHandler.java|  3 ++
 .../java/org/apache/hadoop/hive/ql/Executor.java   |  4 +++
 .../java/org/apache/hadoop/hive/ql/QueryState.java | 41 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 22 +---
 .../hadoop/hive/ql/session/SessionState.java   | 29 +++
 5 files changed, 53 insertions(+), 46 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
index a6d3481..09fc767 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
@@ -224,6 +224,8 @@ class DriverTxnHandler {
 }
 
 try {
+  // Ensure we answer any metadata calls with fresh responses
+  driverContext.getQueryState().disableHMSCache();
   setWriteIdForAcidFileSinks();
   allocateWriteIdForAcidAnalyzeTable();
   boolean hasAcidDdl = setWriteIdForAcidDdl();
@@ -245,6 +247,7 @@ class DriverTxnHandler {
   throw DriverUtils.createProcessorException(driverContext, 10, 
errorMessage, ErrorMsg.findSQLState(e.getMessage()),
   e);
 } finally {
+  driverContext.getQueryState().enableHMSCache();
   perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
 }
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
index ce47d17..6b5ee95 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
@@ -105,7 +105,10 @@ public class Executor {
 
   preExecutionActions();
   preExecutionCacheActions();
+  // Disable HMS cache so any metadata calls during execution get fresh 
responses.
+  driverContext.getQueryState().disableHMSCache();
   runTasks(noName);
+  driverContext.getQueryState().enableHMSCache();
   postExecutionCacheActions();
   postExecutionActions();
 } catch (CommandProcessorException cpe) {
@@ -118,6 +121,7 @@ public class Executor {
   handleException(hookContext, e);
 } finally {
   cleanUp(noName, hookContext, executionError);
+  driverContext.getQueryState().enableHMSCache();
 }
   }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java 
b/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
index 5edebca..b39037c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
@@ -30,12 +30,17 @@ import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.tez.dag.api.TezConfiguration;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
  * The class to store query level info such as queryId. Multiple queries can 
run
  * in the same session, so SessionState is to hold common session related 
info, and
  * each QueryState is to hold query related info.
  */
 public class QueryState {
+  private static final Logger LOG = LoggerFactory.getLogger(QueryState.class);
+
   /**
* current configuration.
*/
@@ -68,6 +73,17 @@ public class QueryState {
   private final Map resourceMap = new HashMap<>();
 
   /**
+   * Cache of HMS requests/responses utilized by SessionHiveMetaStoreClient.
+   */
+  private Map hmsCache;
+
+  /**
+   * Tracks if HMS cache should be used to answer metadata requests.
+   * In some sections, it makes sense to disable the cache to get fresh 
responses.
+   */
+  private boolean hmsCacheEnabled;
+
+  /**
* query level lock for ConditionalTask#resolveTask.
*/
   private final ReentrantLock resolveConditionalTaskLock = new 
ReentrantLock(true);
@@ -82,13 +98,36 @@ public class QueryState {
 
   // Get the query id stored in query specific config.
   public String getQueryId() {
-return (queryConf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+return queryConf.getVar(HiveConf.ConfVars.HIVEQUERYID);
   }
 
   public String getQueryString() {
 return queryConf.getQueryString();
   }
 
+  // Returns the HMS cache if it is currently enabled
+  public Map getHMSCache() {
+return hmsCacheEnabled ? hmsCache : null;
+  }
+
+  /**
+   * Disable the HMS cache. Useful in situations when you
+   *