This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-3 by this push: new c90f2f7a805 HIVE-27542: Backport HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (#4525) c90f2f7a805 is described below commit c90f2f7a805cf056978a5f3cc88525d10910dce0 Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com> AuthorDate: Fri Aug 11 12:37:35 2023 +0530 HIVE-27542: Backport HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (#4525) Signed-off-by: Sankar Hariappan <sank...@apache.org> Closes (#4525) --- .../hadoop/hive/metastore/conf/MetastoreConf.java | 3 +++ .../apache/hadoop/hive/metastore/txn/TxnUtils.java | 5 ++-- .../hadoop/hive/metastore/txn/TestTxnUtils.java | 27 +++++++++++++++++++--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 322edf10d93..aafd50ae466 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -457,6 +457,9 @@ public class MetastoreConf { DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause", "hive.direct.sql.max.elements.values.clause", 1000, "The maximum number of values in a VALUES clause for INSERT statement."), + DIRECT_SQL_MAX_PARAMETERS("metastore.direct.sql.max.parameters", + "hive.direct.sql.max.parameters", 1000, "The maximum query parameters \n" + + "backend sql engine can support."), DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length", "hive.direct.sql.max.query.length", 100, "The maximum\n" + " size of a query string (in KB)."), diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java index fa291d5f20a..61701625150 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java @@ -329,6 +329,7 @@ public class TxnUtils { // Get configuration parameters int maxQueryLength = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH); int batchSize = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE); + int maxParameters = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS); // Check parameter set validity as a public method. if (inList == null || inList.size() == 0 || maxQueryLength <= 0 || batchSize <= 0) { @@ -380,7 +381,7 @@ public class TxnUtils { // Compute the size of a query when the 'nextValue' is added to the current query. int querySize = querySizeExpected(buf.length(), nextValue.length(), suffix.length(), addParens); - if (querySize > maxQueryLength * 1024) { + if ((querySize > maxQueryLength * 1024) || (currentCount >= maxParameters)) { // Check an edge case where the DIRECT_SQL_MAX_QUERY_LENGTH does not allow one 'IN' clause with single value. if (cursor4queryOfInClauses == 1 && cursor4InClauseElements == 0) { throw new IllegalArgumentException("The current " + ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH.getVarname() + " is set too small to have one IN clause with single value!"); @@ -396,7 +397,7 @@ public class TxnUtils { buf.delete(buf.length()-newInclausePrefix.length(), buf.length()); } - buf.setCharAt(buf.length() - 1, ')'); // replace the "commar" to finish a 'IN' clause string. + buf.setCharAt(buf.length() - 1, ')'); // replace the "comma" to finish a 'IN' clause string. if (addParens) { buf.append(")"); diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java index 60be0f9c227..cd237b9caf2 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java @@ -61,6 +61,7 @@ public class TestTxnUtils { // The first query happens to have 2 full batches. MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 1); MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE, 10); + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 2000); List<Long> inList = new ArrayList<>(); for (long i = 1; i <= 189; i++) { inList.add(i); @@ -148,12 +149,32 @@ public class TestTxnUtils { ret = TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, inList, "TXN_ID", false, false); Assert.assertEquals(3, queries.size()); Assert.assertEquals(queries.size(), ret.size()); - Assert.assertEquals(2255L, ret.get(0).longValue()); - Assert.assertEquals(2033L, ret.get(1).longValue()); - Assert.assertEquals(33L, ret.get(2).longValue()); + Assert.assertEquals(2000L, ret.get(0).longValue()); + Assert.assertEquals(2000L, ret.get(1).longValue()); + Assert.assertEquals(321L, ret.get(2).longValue()); runAgainstDerby(queries); } + @Test(expected = IllegalArgumentException.class) + public void testBuildQueryWithNOTINClauseFailure() throws Exception { + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 10); + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE, 100); + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 1000); + List<String> queries = new ArrayList<>(); + List<Long> deleteSet = new ArrayList<>(); + for (long i=0; i < 2000; i++) { + deleteSet.add(i+1); + } + StringBuilder prefix = new StringBuilder(); + StringBuilder suffix = new StringBuilder(); + prefix.append("select count(*) from TXNS where "); + List<String> questions = new ArrayList<>(deleteSet.size()); + for (int i = 0; i < deleteSet.size(); i++) { + questions.add("?"); + } + TxnUtils.buildQueryWithINClauseStrings(conf, queries, prefix, suffix, questions, "cc_id", false, true); + } + /** Verify queries can run against Derby DB. * As long as Derby doesn't complain, we assume the query is syntactically/semantically correct. */