This is an automated email from the ASF dual-hosted git repository. omalley pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/hive.git
commit 1b803f8050b0dc7b1771bdb7ef22556e6df71cef Author: Zoltan Haindrich <k...@rxd.hu> AuthorDate: Wed Aug 22 18:20:41 2018 +0200 HIVE-18624: Parsing time is extremely high (~10 min) for queries with complex select expressions (Zoltan Haindrich reviewed by Ashutosh Chauhan) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> (cherry picked from commit 4408661c0501bf1e7991e144f65b49732f4c641b) (cherry picked from commit a4b913360d6086b5da8d1c84a2d3cfd847131056) --- .../hadoop/hive/ql/parse/IdentifiersParser.g | 2 +- .../hadoop/hive/ql/parse/TestParseDriver.java | 100 +++++++++++++++++++++ .../hive/ql/parse/TestParseDriverIntervals.java | 3 +- .../clientnegative/char_pad_convert_fail2.q.out | 2 +- .../ptf_negative_DistributeByOrderBy.q.out | 3 +- .../ptf_negative_PartitionBySortBy.q.out | 3 +- .../clientnegative/ptf_window_boundaries.q.out | 2 +- .../clientnegative/ptf_window_boundaries2.q.out | 2 +- 8 files changed, 110 insertions(+), 7 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 8c4ee8a..071676a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -420,7 +420,7 @@ atomExpression | whenExpression | (subQueryExpression)=> (subQueryExpression) -> ^(TOK_SUBQUERY_EXPR TOK_SUBQUERY_OP subQueryExpression) - | (function) => function + | (functionName LPAREN) => function | tableOrColumn | expressionsInParenthesis[true] ; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java index cd9db19..827921d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java @@ -19,13 +19,22 @@ package org.apache.hadoop.hive.ql.parse; import static org.junit.Assert.assertEquals; +import org.junit.FixMethodOrder; import org.junit.Test; +import org.junit.runners.MethodSorters; +@FixMethodOrder(MethodSorters.NAME_ASCENDING) public class TestParseDriver { ParseDriver parseDriver = new ParseDriver(); @Test + public void atFirstWarmup() throws Exception { + // this test method is here to do an initial call to parsedriver; and prevent any tests with timeouts to be the first. + parseDriver.parse("select 1"); + } + + @Test public void testParse() throws Exception { String selectStr = "select field1, field2, sum(field3+field4)"; String whereStr = "field5=1 and field6 in ('a', 'b')"; @@ -114,4 +123,95 @@ public class TestParseDriver { assertTree((ASTNode) astNode1.getChild(i), (ASTNode) astNode2.getChild(i)); } } + + @Test(timeout = 1000) + public void testNestedFunctionCalls() throws Exception { + // Expectation here is not to run into a timeout + parseDriver.parse( + "select greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1," + + "greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1," + + "greatest(1,greatest(1,(greatest(1,greatest(1,2)))))))))))))))))))"); + } + + @Test(timeout = 1000) + public void testHIVE18624() throws Exception { + // Expectation here is not to run into a timeout + parseDriver.parse("EXPLAIN\n" + + "SELECT DISTINCT\n" + + "\n" + + "\n" + + " IF(lower('a') <= lower('a')\n" + + " ,'a'\n" + + " ,IF(('a' IS NULL AND from_unixtime(UNIX_TIMESTAMP()) <= 'a')\n" + + " ,'a'\n" + + " ,IF(if('a' = 'a', TRUE, FALSE) = 1\n" + + " ,'a'\n" + + " ,IF(('a' = 1 and lower('a') NOT IN ('a', 'a')\n" + + " and lower(if('a' = 'a','a','a')) <= lower('a'))\n" + + " OR ('a' like 'a' OR 'a' like 'a')\n" + + " OR 'a' in ('a','a')\n" + + " ,'a'\n" + + " ,IF(if(lower('a') in ('a', 'a') and 'a'='a', TRUE, FALSE) = 1\n" + + " ,'a'\n" + + " ,IF('a'='a' and unix_timestamp(if('a' = 'a',cast('a' as string),coalesce('a',cast('a' as string),from_unixtime(unix_timestamp())))) <= unix_timestamp(concat_ws('a',cast(lower('a') as string),'00:00:00')) + 9*3600\n" + + + " ,'a'\n" + + "\n" + + " ,If(lower('a') <= lower('a')\n" + + " and if(lower('a') in ('a', 'a') and 'a'<>'a', TRUE, FALSE) <> 1\n" + + " ,'a'\n" + + " ,IF('a'=1 AND 'a'=1\n" + + " ,'a'\n" + + " ,IF('a' = 1 and COALESCE(cast('a' as int),0) = 0\n" + + " ,'a'\n" + + " ,IF('a' = 'a'\n" + + " ,'a'\n" + + "\n" + + " ,If('a' = 'a' AND lower('a')>lower(if(lower('a')<1830,'a',cast(date_add('a',1) as timestamp)))\n" + + " ,'a'\n" + + "\n" + + "\n" + + "\n" + + " ,IF('a' = 1\n" + + "\n" + + " ,IF('a' in ('a', 'a') and ((unix_timestamp('a')-unix_timestamp('a')) / 60) > 30 and 'a' = 1\n" + + "\n" + + "\n" + + " ,'a', 'a')\n" + + "\n" + + "\n" + + " ,IF(if('a' = 'a', FALSE, TRUE ) = 1 AND 'a' IS NULL\n" + + " ,'a'\n" + + " ,IF('a' = 1 and 'a'>0\n" + + " , 'a'\n" + + "\n" + + " ,IF('a' = 1 AND 'a' ='a'\n" + + " ,'a'\n" + + " ,IF('a' is not null and 'a' is not null and 'a' > 'a'\n" + + " ,'a'\n" + + " ,IF('a' = 1\n" + + " ,'a'\n" + + "\n" + + " ,IF('a' = 'a'\n" + + " ,'a'\n" + + "\n" + + " ,If('a' = 1\n" + + " ,'a'\n" + + " ,IF('a' = 1\n" + + " ,'a'\n" + + " ,IF('a' = 1\n" + + " ,'a'\n" + + "\n" + + " ,IF('a' ='a' and 'a' ='a' and cast(unix_timestamp('a') as int) + 93600 < cast(unix_timestamp() as int)\n" + + " ,'a'\n" + + " ,IF('a' = 'a'\n" + + " ,'a'\n" + + " ,IF('a' = 'a' and 'a' in ('a','a','a')\n" + + " ,'a'\n" + + " ,IF('a' = 'a'\n" + + " ,'a','a'))\n" + + " )))))))))))))))))))))))\n" + + "AS test_comp_exp"); + } + } \ No newline at end of file diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java index 98ad12a..dd25f51 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java @@ -74,8 +74,9 @@ public class TestParseDriverIntervals { if (children != null) { for (Node c : children) { ASTNode r = findFunctionNode((ASTNode) c); - if (r != null) + if (r != null) { return r; + } } } return null; diff --git a/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out b/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out index 2780b7c..90f9356 100644 --- a/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out +++ b/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out @@ -40,4 +40,4 @@ POSTHOOK: query: load data local inpath '../../data/files/over1k' into table ove POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k -FAILED: ParseException line 7:11 cannot recognize input near 'lpad' '(' '{' in expression specification +FAILED: ParseException line 7:12 cannot recognize input near '{' '"key1"' ':' in function specification diff --git a/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out b/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out index e8e8580..cb7f154 100644 --- a/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out +++ b/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out @@ -1 +1,2 @@ -FAILED: ParseException line 3:3 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification +FAILED: ParseException line 3:46 missing ) at 'order' near 'p_mfgr' +line 3:61 missing EOF at ')' near 'p_mfgr' diff --git a/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out b/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out index e8e8580..5daf86c 100644 --- a/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out +++ b/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out @@ -1 +1,2 @@ -FAILED: ParseException line 3:3 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification +FAILED: ParseException line 3:45 missing ) at 'sort' near 'p_mfgr' +line 3:59 missing EOF at ')' near 'p_mfgr' diff --git a/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out b/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out index c9c1c6d..c76feee 100644 --- a/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out +++ b/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out @@ -1 +1 @@ -FAILED: ParseException line 2:7 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification +FAILED: ParseException line 2:44 mismatched input 'following' expecting KW_PRECEDING near 'unbounded' in windowframestartboundary diff --git a/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out b/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out index c9c1c6d..9ed8be5 100644 --- a/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out +++ b/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out @@ -1 +1 @@ -FAILED: ParseException line 2:7 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification +FAILED: ParseException line 2:45 mismatched input 'following' expecting KW_PRECEDING near 'unbounded' in windowframestartboundary