KYLIN-2515 Fix sql convert issue if has the sql has sub-query
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/70ad90c2 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/70ad90c2 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/70ad90c2 Branch: refs/heads/master Commit: 70ad90c2d5ee643b2febde1857a55c150eb3d9cf Parents: 81d2637 Author: nichunen <[email protected]> Authored: Fri Jun 16 21:05:40 2017 +0800 Committer: æ <[email protected]> Committed: Fri Jun 16 21:26:56 2017 +0800 ---------------------------------------------------------------------- .../source/adhocquery/HiveAdhocConverter.java | 84 +++++++++++++++++--- .../adhocquery/HiveAdhocConverterTest.java | 39 ++++++--- 2 files changed, 99 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/70ad90c2/core-metadata/src/main/java/org/apache/kylin/source/adhocquery/HiveAdhocConverter.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/adhocquery/HiveAdhocConverter.java b/core-metadata/src/main/java/org/apache/kylin/source/adhocquery/HiveAdhocConverter.java index 97d77bf..89b2f7a 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/adhocquery/HiveAdhocConverter.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/adhocquery/HiveAdhocConverter.java @@ -26,16 +26,48 @@ import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableSet; + //TODO: Some workaround ways to make sql readable by hive parser, should replaced it with a more well-designed way public class HiveAdhocConverter implements IAdHocConverter { @SuppressWarnings("unused") private static final Logger logger = LoggerFactory.getLogger(HiveAdhocConverter.class); - private static final Pattern EXTRACT_PATTERN = Pattern.compile("\\s+extract\\s*(\\()\\s*(.*?)\\s*from(\\s+)", Pattern.CASE_INSENSITIVE); - private static final Pattern FROM_PATTERN = Pattern.compile("\\s+from\\s+(\\()\\s*select\\s", Pattern.CASE_INSENSITIVE); - private static final Pattern CAST_PATTERN = Pattern.compile("CAST\\((.*?) (?i)AS\\s*(.*?)\\s*\\)", Pattern.CASE_INSENSITIVE); - private static final Pattern CONCAT_PATTERN = Pattern.compile("(['_a-z0-9A-Z]+)\\|\\|(['_a-z0-9A-Z]+)", Pattern.CASE_INSENSITIVE); + private static final Pattern EXTRACT_PATTERN = Pattern.compile("extract\\s*(\\()\\s*(.*?)\\s*from(\\s+)", + Pattern.CASE_INSENSITIVE); + private static final Pattern FROM_PATTERN = Pattern.compile("\\s+from\\s+(\\()\\s*select\\s", + Pattern.CASE_INSENSITIVE); + private static final Pattern ALIAS_PATTERN = Pattern.compile("\\s+([`'_a-z0-9A-Z]+)", Pattern.CASE_INSENSITIVE); + private static final Pattern CAST_PATTERN = Pattern.compile("CAST\\((.*?) (?i)AS\\s*(.*?)\\s*\\)", + Pattern.CASE_INSENSITIVE); + private static final Pattern CONCAT_PATTERN = Pattern.compile("(['_a-z0-9A-Z]+)\\|\\|(['_a-z0-9A-Z]+)", + Pattern.CASE_INSENSITIVE); + private static final Pattern TIMESTAMPADD_PATTERN = Pattern.compile("timestampadd\\s*\\(\\s*(.*?)\\s*,", + Pattern.CASE_INSENSITIVE); + private static final ImmutableSet<String> sqlKeyWordsExceptAS = ImmutableSet.of("ABSOLUTE", "ACTION", "ADD", "ALL", + "ALLOCATE", "ALTER", "AND", "ANY", "ARE", "ASC", "ASSERTION", "AT", "AUTHORIZATION", "AVG", "BEGIN", + "BETWEEN", "BIT", "BIT_LENGTH", "BOTH", "BY", "CASCADE", "CASCADED", "CASE", "CAST", "CATALOG", "CHAR", + "CHARACTER", "CHARACTER_LENGTH", "CHAR_LENGTH", "CHECK", "CLOSE", "COALESCE", "COLLATE", "COLLATION", + "COLUMN", "COMMIT", "CONNECT", "CONNECTION", "CONSTRAINT", "CONSTRAINTS", "CONTINUE", "CONVERT", + "CORRESPONDING", "COUNT", "CREATE", "CROSS", "CURRENT", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", + "CURRENT_USER", "CURSOR", "DATE", "DAY", "DEALLOCATE", "DEC", "DECIMAL", "DECLARE", "DEFAULT", "DEFERRABLE", + "DEFERRED", "DELETE", "DESC", "DESCRIBE", "DESCRIPTOR", "DIAGNOSTICS", "DISCONNECT", "DISTINCT", "DOMAIN", + "DOUBLE", "DROP", "ELSE", "END", "END-EXEC", "ESCAPE", "EXCEPT", "EXCEPTION", "EXEC", "EXECUTE", "EXISTS", + "EXTERNAL", "EXTRACT", "FALSE", "FETCH", "FIRST", "FLOAT", "FOR", "FOREIGN", "FOUND", "FROM", "FULL", "GET", + "GLOBAL", "GO", "GOTO", "GRANT", "GROUP", "HAVING", "HOUR", "IDENTITY", "IMMEDIATE", "IN", "INDICATOR", + "INITIALLY", "INNER", "INADD", "INSENSITIVE", "INSERT", "INT", "INTEGER", "INTERSECT", "INTERVAL", "INTO", + "IS", "ISOLATION", "JOIN", "KEY", "LANGUAGE", "LAST", "LEADING", "LEFT", "LEVEL", "LIKE", "LOCAL", "LOWER", + "MATCH", "MAX", "MIN", "MINUTE", "MODULE", "MONTH", "NAMES", "NATIONAL", "NATURAL", "NCHAR", "NEXT", "NO", + "NOT", "NULL", "NULLIF", "NUMERIC", "OCTET_LENGTH", "OF", "ON", "ONLY", "OPEN", "OPTION", "OR", "ORDER", + "OUTER", "OUTADD", "OVERLAPS", "PAD", "PARTIAL", "POSITION", "PRECISION", "PREPARE", "PRESERVE", "PRIMARY", + "PRIOR", "PRIVILEGES", "PROCEDURE", "PUBLIC", "READ", "REAL", "REFERENCES", "RELATIVE", "RESTRICT", + "REVOKE", "RIGHT", "ROLLBACK", "ROWS", "SCHEMA", "SCROLL", "SECOND", "SECTION", "SELECT", "SESSION", + "SESSION_USER", "SET", "SIZE", "SMALLINT", "SOME", "SPACE", "SQL", "SQLCODE", "SQLERROR", "SQLSTATE", + "SUBSTRING", "SUM", "SYSTEM_USER", "TABLE", "TEMPORARY", "THEN", "TIME", "TIMESTAMP", "TIMEZONE_HOUR", + "TIMEZONE_MINUTE", "TO", "TRAILING", "TRANSACTION", "TRANSLATE", "TRANSLATION", "TRIM", "TRUE", "UNION", + "UNIQUE", "UNKNOWN", "UPDATE", "UPPER", "USAGE", "USER", "USING", "VALUE", "VALUES", "VARCHAR", "VARYING", + "VIEW", "WHEN", "WHENEVER", "WHERE", "WITH", "WORK", "WRITE", "YEAR", "ZONE"); public static String replaceString(String originString, String fromString, String toString) { return originString.replace(fromString, toString); @@ -55,16 +87,17 @@ public class HiveAdhocConverter implements IAdHocConverter { int startIdx = extractMatcher.end(3); int endIdx = parenthesesPairs.get(extractMatcher.start(1)); String extractInner = originString.substring(startIdx, endIdx); - int originStart = extractMatcher.start(0) + 1; + int originStart = extractMatcher.start(0); int originEnd = endIdx + 1; - replacedString = replaceString(replacedString, originString.substring(originStart, originEnd), functionStr + "(" + extractInner + ")"); + replacedString = replaceString(replacedString, originString.substring(originStart, originEnd), + functionStr + "(" + extractInner + ")"); } return replacedString; } - public static String castRepalce(String originString) { + public static String castReplace(String originString) { Matcher castMatcher = CAST_PATTERN.matcher(originString); String replacedString = originString; @@ -95,7 +128,7 @@ public class HiveAdhocConverter implements IAdHocConverter { return replacedString; } - public static String subqueryRepalce(String originString) { + public static String subqueryReplace(String originString) { Matcher subqueryMatcher = FROM_PATTERN.matcher(originString); String replacedString = originString; Map<Integer, Integer> parenthesesPairs = null; @@ -108,7 +141,30 @@ public class HiveAdhocConverter implements IAdHocConverter { int startIdx = subqueryMatcher.start(1); int endIdx = parenthesesPairs.get(startIdx) + 1; - replacedString = replaceString(replacedString, originString.substring(startIdx, endIdx), originString.substring(startIdx, endIdx) + " as alias"); + Matcher aliasMatcher = ALIAS_PATTERN.matcher(originString.substring(endIdx)); + if (aliasMatcher.find()) { + String aliasCandidate = aliasMatcher.group(1); + + if (aliasCandidate != null && !sqlKeyWordsExceptAS.contains(aliasCandidate.toUpperCase())) { + continue; + } + + replacedString = replaceString(replacedString, originString.substring(startIdx, endIdx), + originString.substring(startIdx, endIdx) + " as alias"); + } + } + + return replacedString; + } + + public static String timestampaddReplace(String originString) { + Matcher timestampaddMatcher = TIMESTAMPADD_PATTERN.matcher(originString); + String replacedString = originString; + + while (timestampaddMatcher.find()) { + String interval = timestampaddMatcher.group(1); + String timestampaddStr = replaceString(timestampaddMatcher.group(), interval, "'" + interval + "'"); + replacedString = replaceString(replacedString, timestampaddMatcher.group(), timestampaddStr); } return replacedString; @@ -121,7 +177,8 @@ public class HiveAdhocConverter implements IAdHocConverter { while (concatMatcher.find()) { String leftString = concatMatcher.group(1); String rightString = concatMatcher.group(2); - replacedString = replaceString(replacedString, leftString + "||" + rightString, "concat(" + leftString + "," + rightString + ")"); + replacedString = replaceString(replacedString, leftString + "||" + rightString, + "concat(" + leftString + "," + rightString + ")"); } return replacedString; @@ -135,10 +192,10 @@ public class HiveAdhocConverter implements IAdHocConverter { convertedSql = extractReplace(convertedSql); // Step3.Replace cast type string - convertedSql = castRepalce(convertedSql); + convertedSql = castReplace(convertedSql); // Step4.Replace sub query - convertedSql = subqueryRepalce(convertedSql); + convertedSql = subqueryReplace(convertedSql); // Step5.Replace char_length with length convertedSql = replaceString(convertedSql, "char_length", "length"); @@ -146,6 +203,9 @@ public class HiveAdhocConverter implements IAdHocConverter { // Step6.Replace "||" with concat convertedSql = concatReplace(convertedSql); + // Step7.Add quote for interval in timestampadd + convertedSql = timestampaddReplace(convertedSql); + return convertedSql; } http://git-wip-us.apache.org/repos/asf/kylin/blob/70ad90c2/core-metadata/src/test/java/org/apache/kylin/source/adhocquery/HiveAdhocConverterTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/source/adhocquery/HiveAdhocConverterTest.java b/core-metadata/src/test/java/org/apache/kylin/source/adhocquery/HiveAdhocConverterTest.java index 85a6d61..cfb0f32 100644 --- a/core-metadata/src/test/java/org/apache/kylin/source/adhocquery/HiveAdhocConverterTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/source/adhocquery/HiveAdhocConverterTest.java @@ -22,42 +22,57 @@ import org.junit.Test; import junit.framework.TestCase; - public class HiveAdhocConverterTest extends TestCase { @Test - public void testSringReplace() { + public void testStringReplace() { String originString = "select count(*) as cnt from test_kylin_fact where char_length(lstg_format_name) < 10"; - String replacedString = HiveAdhocConverter - .replaceString(originString, "char_length", "length"); - assertEquals(replacedString, "select count(*) as cnt from test_kylin_fact where length(lstg_format_name) < 10"); + String replacedString = HiveAdhocConverter.replaceString(originString, "char_length", "length"); + assertEquals("select count(*) as cnt from test_kylin_fact where length(lstg_format_name) < 10", replacedString); } @Test public void testExtractReplace() { String originString = "ignore EXTRACT(YEAR FROM KYLIN_CAL_DT.CAL_DT) ignore"; String replacedString = HiveAdhocConverter.extractReplace(originString); - assertEquals(replacedString, "ignore YEAR(KYLIN_CAL_DT.CAL_DT) ignore"); + assertEquals("ignore YEAR(KYLIN_CAL_DT.CAL_DT) ignore", replacedString); } @Test public void testCastReplace() { String originString = "ignore EXTRACT(YEAR FROM CAST(KYLIN_CAL_DT.CAL_DT AS INTEGER)) ignore"; - String replacedString = HiveAdhocConverter.castRepalce(originString); - assertEquals(replacedString, "ignore EXTRACT(YEAR FROM CAST(KYLIN_CAL_DT.CAL_DT AS int)) ignore"); + String replacedString = HiveAdhocConverter.castReplace(originString); + assertEquals("ignore EXTRACT(YEAR FROM CAST(KYLIN_CAL_DT.CAL_DT AS int)) ignore", replacedString); } @Test - public void testSubqueryReplace() { + public void testSubqueryReplace1() { String originString = "select seller_id,lstg_format_name,sum(price) from (select * from test_kylin_fact where (lstg_format_name='FP-GTC') limit 20) group by seller_id,lstg_format_name"; - String replacedString = HiveAdhocConverter.subqueryRepalce(originString); - assertEquals(replacedString, "select seller_id,lstg_format_name,sum(price) from (select * from test_kylin_fact where (lstg_format_name='FP-GTC') limit 20) as alias group by seller_id,lstg_format_name"); + String replacedString = HiveAdhocConverter.subqueryReplace(originString); + assertEquals( + "select seller_id,lstg_format_name,sum(price) from (select * from test_kylin_fact where (lstg_format_name='FP-GTC') limit 20) as alias group by seller_id,lstg_format_name", + replacedString); + } + + @Test + public void testSubqueryReplace2() { + String originString = "select count(*) from ( select test_kylin_fact.lstg_format_name from test_kylin_fact where test_kylin_fact.lstg_format_name='FP-GTC' group by test_kylin_fact.lstg_format_name ) t "; + String replacedString = HiveAdhocConverter.subqueryReplace(originString); + assertEquals(originString, replacedString); + } + + @Test + public void testSubqueryReplace3() { + String originString = "select fact.lstg_format_name from (select * from test_kylin_fact where cal_dt > date'2010-01-01' ) as fact group by fact.lstg_format_name order by CASE WHEN fact.lstg_format_name IS NULL THEN 'sdf' ELSE fact.lstg_format_name END "; + String replacedString = HiveAdhocConverter.subqueryReplace(originString); + assertEquals(originString, replacedString); } @Test public void testConcatReplace() { String originString = "select count(*) as cnt from test_kylin_fact where lstg_format_name||'a'='ABINa'"; String replacedString = HiveAdhocConverter.concatReplace(originString); - assertEquals(replacedString, "select count(*) as cnt from test_kylin_fact where concat(lstg_format_name,'a')='ABINa'"); + assertEquals("select count(*) as cnt from test_kylin_fact where concat(lstg_format_name,'a')='ABINa'", + replacedString); } }
