This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new aa7a903 HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao, Sankar Hariappan) aa7a903 is described below commit aa7a9030ee4d457dd6da45db63a12ce7d972362a Author: guptanikhil007 <gupta.nikhil0...@gmail.com> AuthorDate: Mon Nov 8 11:21:35 2021 +0530 HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao, Sankar Hariappan) Signed-off-by: Sankar Hariappan <sank...@apache.org> Closes (#2758) --- .../hadoop/hive/metastore/conf/MetastoreConf.java | 3 +++ .../apache/hadoop/hive/metastore/txn/TxnUtils.java | 6 ++--- .../hadoop/hive/metastore/txn/TestTxnUtils.java | 29 +++++++++++++++++++--- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 0e05ad3..21ea1f8 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -680,6 +680,9 @@ public class MetastoreConf { DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause", "hive.direct.sql.max.elements.values.clause", 1000, "The maximum number of values in a VALUES clause for INSERT statement."), + DIRECT_SQL_MAX_PARAMETERS("metastore.direct.sql.max.parameters", + "hive.direct.sql.max.parameters", 1000, "The maximum query parameters \n" + + "backend sql engine can support."), DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length", "hive.direct.sql.max.query.length", 100, "The maximum\n" + " size of a query string (in KB)."), diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java index f2c881a..13d45d1 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java @@ -265,6 +265,7 @@ public class TxnUtils { // Get configuration parameters int maxQueryLength = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH); int batchSize = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE); + int maxParameters = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS); // Check parameter set validity as a public method. if (inList == null || inList.size() == 0 || maxQueryLength <= 0 || batchSize <= 0) { @@ -316,7 +317,7 @@ public class TxnUtils { // Compute the size of a query when the 'nextValue' is added to the current query. int querySize = querySizeExpected(buf.length(), nextValue.length(), suffix.length(), addParens); - if (querySize > maxQueryLength * 1024) { + if ((querySize > maxQueryLength * 1024) || (currentCount >= maxParameters)) { // Check an edge case where the DIRECT_SQL_MAX_QUERY_LENGTH does not allow one 'IN' clause with single value. if (cursor4queryOfInClauses == 1 && cursor4InClauseElements == 0) { throw new IllegalArgumentException("The current " + ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH.getVarname() + " is set too small to have one IN clause with single value!"); @@ -351,9 +352,8 @@ public class TxnUtils { continue; } else if (cursor4InClauseElements >= batchSize-1 && cursor4InClauseElements != 0) { // Finish the current 'IN'/'NOT IN' clause and start a new clause. - buf.setCharAt(buf.length() - 1, ')'); // replace the "commar". + buf.setCharAt(buf.length() - 1, ')'); // replace the "comma". buf.append(newInclausePrefix.toString()); - newInclausePrefixJustAppended = true; // increment cursor for per-query IN-clause list diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java index 811a6ac..42f1ca4 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java @@ -63,6 +63,7 @@ public class TestTxnUtils { // The first query happens to have 2 full batches. MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 1); MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE, 10); + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 2000); List<Long> inList = new ArrayList<>(); for (long i = 1; i <= 189; i++) { inList.add(i); @@ -150,12 +151,34 @@ public class TestTxnUtils { ret = TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, inList, "TXN_ID", false, false); Assert.assertEquals(3, queries.size()); Assert.assertEquals(queries.size(), ret.size()); - Assert.assertEquals(2255L, ret.get(0).longValue()); - Assert.assertEquals(2033L, ret.get(1).longValue()); - Assert.assertEquals(33L, ret.get(2).longValue()); + Assert.assertEquals(2000L, ret.get(0).longValue()); + Assert.assertEquals(2000L, ret.get(1).longValue()); + Assert.assertEquals(321L, ret.get(2).longValue()); runAgainstDerby(queries); } + @Test(expected = IllegalArgumentException.class) + public void testBuildQueryWithNOTINClauseFailure() throws Exception { + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH, 10); + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE, 100); + MetastoreConf.setLongVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS, 1000); + List<String> queries = new ArrayList<>(); + List<Long> deleteSet = new ArrayList<>(); + for (long i=0; i < 2000; i++) { + deleteSet.add(i+1); + } + StringBuilder prefix = new StringBuilder(); + StringBuilder suffix = new StringBuilder(); + + prefix.append("select count(*) from TXNS where "); + + List<String> questions = new ArrayList<>(deleteSet.size()); + for (int i = 0; i < deleteSet.size(); i++) { + questions.add("?"); + } + TxnUtils.buildQueryWithINClauseStrings(conf, queries, prefix, suffix, questions, "cc_id", false, true); + } + /** Verify queries can run against Derby DB. * As long as Derby doesn't complain, we assume the query is syntactically/semantically correct. */