This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new c90f2f7a805 HIVE-27542: Backport HIVE-25659: Metastore direct sql 
queries with IN/(NOT IN) should be split based on max parameters allowed by SQL 
DB (#4525)
c90f2f7a805 is described below

commit c90f2f7a805cf056978a5f3cc88525d10910dce0
Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com>
AuthorDate: Fri Aug 11 12:37:35 2023 +0530

    HIVE-27542: Backport HIVE-25659: Metastore direct sql queries with IN/(NOT 
IN) should be split based on max parameters allowed by SQL DB (#4525)
    
    Signed-off-by: Sankar Hariappan <sank...@apache.org>
    Closes (#4525)
---
 .../hadoop/hive/metastore/conf/MetastoreConf.java  |  3 +++
 .../apache/hadoop/hive/metastore/txn/TxnUtils.java |  5 ++--
 .../hadoop/hive/metastore/txn/TestTxnUtils.java    | 27 +++++++++++++++++++---
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 322edf10d93..aafd50ae466 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -457,6 +457,9 @@ public class MetastoreConf {
     
DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause",
         "hive.direct.sql.max.elements.values.clause",
         1000, "The maximum number of values in a VALUES clause for INSERT 
statement."),
+    DIRECT_SQL_MAX_PARAMETERS("metastore.direct.sql.max.parameters",
+        "hive.direct.sql.max.parameters", 1000, "The maximum query parameters 
\n" +
+            "backend sql engine can support."),
     DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length",
         "hive.direct.sql.max.query.length", 100, "The maximum\n" +
         " size of a query string (in KB)."),
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
index fa291d5f20a..61701625150 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
@@ -329,6 +329,7 @@ public class TxnUtils {
     // Get configuration parameters
     int maxQueryLength = MetastoreConf.getIntVar(conf, 
ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH);
     int batchSize = MetastoreConf.getIntVar(conf, 
ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE);
+    int maxParameters = MetastoreConf.getIntVar(conf, 
ConfVars.DIRECT_SQL_MAX_PARAMETERS);
 
     // Check parameter set validity as a public method.
     if (inList == null || inList.size() == 0 || maxQueryLength <= 0 || 
batchSize <= 0) {
@@ -380,7 +381,7 @@ public class TxnUtils {
       // Compute the size of a query when the 'nextValue' is added to the 
current query.
       int querySize = querySizeExpected(buf.length(), nextValue.length(), 
suffix.length(), addParens);
 
-      if (querySize > maxQueryLength * 1024) {
+      if ((querySize > maxQueryLength * 1024) || (currentCount >= 
maxParameters)) {
         // Check an edge case where the DIRECT_SQL_MAX_QUERY_LENGTH does not 
allow one 'IN' clause with single value.
         if (cursor4queryOfInClauses == 1 && cursor4InClauseElements == 0) {
           throw new IllegalArgumentException("The current " + 
ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH.getVarname() + " is set too small to have 
one IN clause with single value!");
@@ -396,7 +397,7 @@ public class TxnUtils {
           buf.delete(buf.length()-newInclausePrefix.length(), buf.length());
         }
 
-        buf.setCharAt(buf.length() - 1, ')'); // replace the "commar" to 
finish a 'IN' clause string.
+        buf.setCharAt(buf.length() - 1, ')'); // replace the "comma" to finish 
a 'IN' clause string.
 
         if (addParens) {
           buf.append(")");
diff --git 
a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
 
b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
index 60be0f9c227..cd237b9caf2 100644
--- 
a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
+++ 
b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
@@ -61,6 +61,7 @@ public class TestTxnUtils {
     //          The first query happens to have 2 full batches.
     MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 1);
     MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE, 
10);
+    MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 2000);
     List<Long> inList = new ArrayList<>();
     for (long i = 1; i <= 189; i++) {
       inList.add(i);
@@ -148,12 +149,32 @@ public class TestTxnUtils {
     ret = TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, 
inList, "TXN_ID", false, false);
     Assert.assertEquals(3, queries.size());
     Assert.assertEquals(queries.size(), ret.size());
-    Assert.assertEquals(2255L, ret.get(0).longValue());
-    Assert.assertEquals(2033L, ret.get(1).longValue());
-    Assert.assertEquals(33L, ret.get(2).longValue());
+    Assert.assertEquals(2000L, ret.get(0).longValue());
+    Assert.assertEquals(2000L, ret.get(1).longValue());
+    Assert.assertEquals(321L, ret.get(2).longValue());
     runAgainstDerby(queries);
   }
 
+  @Test(expected = IllegalArgumentException.class)
+  public void testBuildQueryWithNOTINClauseFailure() throws Exception {
+    MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 10);
+    MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE, 
100);
+    MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 1000);
+    List<String> queries = new ArrayList<>();
+    List<Long> deleteSet = new ArrayList<>();
+    for (long i=0; i < 2000; i++) {
+      deleteSet.add(i+1);
+    }
+    StringBuilder prefix = new StringBuilder();
+    StringBuilder suffix = new StringBuilder();
+    prefix.append("select count(*) from TXNS where ");
+    List<String> questions = new ArrayList<>(deleteSet.size());
+    for (int  i = 0; i < deleteSet.size(); i++) {
+      questions.add("?");
+    }
+    TxnUtils.buildQueryWithINClauseStrings(conf, queries, prefix, suffix, 
questions, "cc_id", false, true);
+  }
+
   /** Verify queries can run against Derby DB.
    *  As long as Derby doesn't complain, we assume the query is 
syntactically/semantically correct.
    */

Reply via email to