DRILL-5697: Improve performance of filter operator for pattern matching closes #907
Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/aaff1b35 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/aaff1b35 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/aaff1b35 Branch: refs/heads/master Commit: aaff1b35b7339fb4e6ab480dd517994ff9f0a5c5 Parents: c7c8ffd Author: Padma Penumarthy <ppenuma...@yahoo.com> Authored: Mon Aug 7 15:11:00 2017 -0700 Committer: Paul Rogers <prog...@maprtech.com> Committed: Tue Sep 12 16:02:20 2017 -0700 ---------------------------------------------------------------------- .../drill/exec/expr/fn/impl/RegexpUtil.java | 77 +- .../expr/fn/impl/SqlPatternComplexMatcher.java | 36 + .../expr/fn/impl/SqlPatternConstantMatcher.java | 52 + .../expr/fn/impl/SqlPatternContainsMatcher.java | 53 + .../expr/fn/impl/SqlPatternEndsWithMatcher.java | 49 + .../exec/expr/fn/impl/SqlPatternFactory.java | 42 + .../exec/expr/fn/impl/SqlPatternMatcher.java | 22 + .../fn/impl/SqlPatternStartsWithMatcher.java | 48 + .../exec/expr/fn/impl/StringFunctions.java | 34 +- .../exec/store/ischema/InfoSchemaFilter.java | 4 +- .../exec/expr/fn/impl/TestSqlPatterns.java | 674 +++++++++++++ .../exec/expr/fn/impl/TestStringFunctions.java | 961 +++++++++++++++++++ 12 files changed, 2028 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java index 38c8316..aed718c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java @@ -47,10 +47,49 @@ public class RegexpUtil { "[:alnum:]", "\\p{Alnum}" }; + // type of pattern string. + public enum SqlPatternType { + STARTS_WITH, // Starts with a constant string followed by any string values (ABC%) + ENDS_WITH, // Ends with a constant string, starts with any string values (%ABC) + CONTAINS, // Contains a constant string, starts and ends with any string values (%ABC%) + CONSTANT, // Is a constant string (ABC) + COMPLEX // Not a simple pattern. Needs regex evaluation. + }; + + public static class SqlPatternInfo { + private final SqlPatternType patternType; // type of pattern + + // simple pattern with like meta characters(% and _) and escape characters removed. + // Used for simple pattern matching. + private final String simplePatternString; + + // javaPatternString used for regex pattern match. + private final String javaPatternString; + + public SqlPatternInfo(final SqlPatternType patternType, final String javaPatternString, final String simplePatternString) { + this.patternType = patternType; + this.simplePatternString = simplePatternString; + this.javaPatternString = javaPatternString; + } + + public SqlPatternType getPatternType() { + return patternType; + } + + public String getSimplePatternString() { + return simplePatternString; + } + + public String getJavaPatternString() { + return javaPatternString; + } + + } + /** * Translates a SQL LIKE pattern to Java regex pattern. No escape char. */ - public static String sqlToRegexLike(String sqlPattern) { + public static SqlPatternInfo sqlToRegexLike(String sqlPattern) { return sqlToRegexLike(sqlPattern, (char)0); } @@ -58,7 +97,7 @@ public class RegexpUtil { * Translates a SQL LIKE pattern to Java regex pattern, with optional * escape string. */ - public static String sqlToRegexLike( + public static SqlPatternInfo sqlToRegexLike( String sqlPattern, CharSequence escapeStr) { final char escapeChar; @@ -76,12 +115,22 @@ public class RegexpUtil { /** * Translates a SQL LIKE pattern to Java regex pattern. */ - public static String sqlToRegexLike( + public static SqlPatternInfo sqlToRegexLike( String sqlPattern, char escapeChar) { int i; final int len = sqlPattern.length(); final StringBuilder javaPattern = new StringBuilder(len + len); + final StringBuilder simplePattern = new StringBuilder(len); + + // Figure out the pattern type and build simplePatternString + // as we are going through the sql pattern string + // to build java regex pattern string. This is better instead of using + // regex later for determining if a pattern is simple or not. + // Saves CPU cycles. + // Start with patternType as CONSTANT + SqlPatternType patternType = SqlPatternType.CONSTANT; + for (i = 0; i < len; i++) { char c = sqlPattern.charAt(i); if (JAVA_REGEX_SPECIALS.indexOf(c) >= 0) { @@ -96,20 +145,40 @@ public class RegexpUtil { || (nextChar == '%') || (nextChar == escapeChar)) { javaPattern.append(nextChar); + simplePattern.append(nextChar); i++; } else { throw invalidEscapeSequence(sqlPattern, i); } } else if (c == '_') { + // if we find _, it is not simple pattern, we are looking for only % + patternType = SqlPatternType.COMPLEX; javaPattern.append('.'); } else if (c == '%') { + if (i == 0) { + // % at the start could potentially be one of the simple cases i.e. ENDS_WITH. + patternType = SqlPatternType.ENDS_WITH; + } else if (i == (len-1)) { + if (patternType == SqlPatternType.ENDS_WITH) { + // Starts and Ends with %. This is contains. + patternType = SqlPatternType.CONTAINS; + } else if (patternType == SqlPatternType.CONSTANT) { + // % at the end with constant string in the beginning i.e. STARTS_WITH + patternType = SqlPatternType.STARTS_WITH; + } + } else { + // If we find % anywhere other than start or end, it is not a simple case. + patternType = SqlPatternType.COMPLEX; + } javaPattern.append("."); javaPattern.append('*'); } else { javaPattern.append(c); + simplePattern.append(c); } } - return javaPattern.toString(); + + return new SqlPatternInfo(patternType, javaPattern.toString(), simplePattern.toString()); } private static RuntimeException invalidEscapeCharacter(String s) { http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java new file mode 100644 index 0000000..91cc85d --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +public class SqlPatternComplexMatcher implements SqlPatternMatcher { + java.util.regex.Matcher matcher; + CharSequence charSequenceWrapper; + + public SqlPatternComplexMatcher(String patternString, CharSequence charSequenceWrapper) { + this.charSequenceWrapper = charSequenceWrapper; + matcher = java.util.regex.Pattern.compile(patternString).matcher(""); + matcher.reset(charSequenceWrapper); + } + + @Override + public int match() { + matcher.reset(); + return matcher.matches() ? 1 : 0; + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java new file mode 100644 index 0000000..3294575 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +public class SqlPatternConstantMatcher implements SqlPatternMatcher { + final String patternString; + CharSequence charSequenceWrapper; + final int patternLength; + + public SqlPatternConstantMatcher(String patternString, CharSequence charSequenceWrapper) { + this.patternString = patternString; + this.charSequenceWrapper = charSequenceWrapper; + patternLength = patternString.length(); + } + + @Override + public int match() { + int index = 0; + + // If the lengths are not same, there cannot be a match + if (patternLength != charSequenceWrapper.length()) { + return 0; + } + + // simplePattern string has meta characters i.e % and _ and escape characters removed. + // so, we can just directly compare. + while (index < patternLength) { + if (patternString.charAt(index) != charSequenceWrapper.charAt(index)) { + break; + } + index++; + } + + return index == patternLength ? 1 : 0; + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java new file mode 100644 index 0000000..2602dc8 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +public class SqlPatternContainsMatcher implements SqlPatternMatcher { + final String patternString; + CharSequence charSequenceWrapper; + final int patternLength; + + public SqlPatternContainsMatcher(String patternString, CharSequence charSequenceWrapper) { + this.patternString = patternString; + this.charSequenceWrapper = charSequenceWrapper; + patternLength = patternString.length(); + } + + @Override + public int match() { + final int txtLength = charSequenceWrapper.length(); + int patternIndex = 0; + int txtIndex = 0; + + // simplePattern string has meta characters i.e % and _ and escape characters removed. + // so, we can just directly compare. + while (patternIndex < patternLength && txtIndex < txtLength) { + if (patternString.charAt(patternIndex) != charSequenceWrapper.charAt(txtIndex)) { + // Go back if there is no match + txtIndex = txtIndex - patternIndex; + patternIndex = 0; + } else { + patternIndex++; + } + txtIndex++; + } + + return patternIndex == patternLength ? 1 : 0; + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java new file mode 100644 index 0000000..15fed22 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +public class SqlPatternEndsWithMatcher implements SqlPatternMatcher { + final String patternString; + CharSequence charSequenceWrapper; + final int patternLength; + + public SqlPatternEndsWithMatcher(String patternString, CharSequence charSequenceWrapper) { + this.charSequenceWrapper = charSequenceWrapper; + this.patternString = patternString; + this.patternLength = patternString.length(); + } + + @Override + public int match() { + int txtIndex = charSequenceWrapper.length(); + int patternIndex = patternLength; + boolean matchFound = true; // if pattern is empty string, we always match. + + // simplePattern string has meta characters i.e % and _ and escape characters removed. + // so, we can just directly compare. + while (patternIndex > 0 && txtIndex > 0) { + if (charSequenceWrapper.charAt(--txtIndex) != patternString.charAt(--patternIndex)) { + matchFound = false; + break; + } + } + + return (patternIndex == 0 && matchFound == true) ? 1 : 0; + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java new file mode 100644 index 0000000..9c85a01 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.expr.fn.impl; + +public class SqlPatternFactory { + public static SqlPatternMatcher getSqlPatternMatcher(org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternInfo patternInfo, + CharSequence charSequenceWrapper) + { + switch (patternInfo.getPatternType()) { + case COMPLEX: + return new SqlPatternComplexMatcher(patternInfo.getJavaPatternString(), charSequenceWrapper); + case STARTS_WITH: + return new SqlPatternStartsWithMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper); + case CONSTANT: + return new SqlPatternConstantMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper); + case ENDS_WITH: + return new SqlPatternEndsWithMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper); + case CONTAINS: + return new SqlPatternContainsMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper); + default: + break; + } + + return null; + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java new file mode 100644 index 0000000..9c0c6e2 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +public interface SqlPatternMatcher { + public int match(); +} http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java new file mode 100644 index 0000000..9faae8a --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +public class SqlPatternStartsWithMatcher implements SqlPatternMatcher { + final String patternString; + CharSequence charSequenceWrapper; + final int patternLength; + + public SqlPatternStartsWithMatcher(String patternString, CharSequence charSequenceWrapper) { + this.charSequenceWrapper = charSequenceWrapper; + this.patternString = patternString; + patternLength = patternString.length(); + } + + @Override + public int match() { + int index = 0; + final int txtLength = charSequenceWrapper.length(); + + // simplePattern string has meta characters i.e % and _ and escape characters removed. + // so, we can just directly compare. + while (index < patternLength && index < txtLength) { + if (patternString.charAt(index) != charSequenceWrapper.charAt(index)) { + break; + } + index++; + } + + return (index == patternLength ? 1 : 0); + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java index e5fe957..2a99ffa 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java @@ -55,24 +55,23 @@ public class StringFunctions{ @Param VarCharHolder input; @Param(constant=true) VarCharHolder pattern; @Output BitHolder out; - @Workspace java.util.regex.Matcher matcher; @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; + @Workspace org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternInfo sqlPatternInfo; + @Workspace org.apache.drill.exec.expr.fn.impl.SqlPatternMatcher sqlPatternMatcher; @Override public void setup() { - matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( // - org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher(""); + sqlPatternInfo = org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( + org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)); charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); - matcher.reset(charSequenceWrapper); + sqlPatternMatcher = org.apache.drill.exec.expr.fn.impl.SqlPatternFactory.getSqlPatternMatcher(sqlPatternInfo, charSequenceWrapper); } @Override public void eval() { - charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); // Reusing same charSequenceWrapper, no need to pass it in. - // This saves one method call since reset(CharSequence) calls reset() - matcher.reset(); - out.value = matcher.matches()? 1:0; + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + out.value = sqlPatternMatcher.match(); } } @@ -83,25 +82,24 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder pattern; @Param(constant=true) VarCharHolder escape; @Output BitHolder out; - @Workspace java.util.regex.Matcher matcher; @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; + @Workspace org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternInfo sqlPatternInfo; + @Workspace org.apache.drill.exec.expr.fn.impl.SqlPatternMatcher sqlPatternMatcher; @Override public void setup() { - matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( // + sqlPatternInfo = org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer), - org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer))).matcher(""); + org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer)); charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); - matcher.reset(charSequenceWrapper); + sqlPatternMatcher = org.apache.drill.exec.expr.fn.impl.SqlPatternFactory.getSqlPatternMatcher(sqlPatternInfo, charSequenceWrapper); } @Override public void eval() { - charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); // Reusing same charSequenceWrapper, no need to pass it in. - // This saves one method call since reset(CharSequence) calls reset() - matcher.reset(); - out.value = matcher.matches()? 1:0; + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + out.value = sqlPatternMatcher.match(); } } @@ -117,7 +115,7 @@ public class StringFunctions{ @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( // - org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)), + org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).getJavaPatternString(), java.util.regex.Pattern.CASE_INSENSITIVE).matcher(""); charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); matcher.reset(charSequenceWrapper); @@ -147,7 +145,7 @@ public class StringFunctions{ public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( // org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer), - org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer)), + org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer)).getJavaPatternString(), java.util.regex.Pattern.CASE_INSENSITIVE).matcher(""); charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); matcher.reset(charSequenceWrapper); http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java index 22ad7f6..6d4d8ce 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java @@ -152,10 +152,10 @@ public class InfoSchemaFilter { final String fieldValue = recordValues.get(col.field.toString()); if (fieldValue != null) { if (escape == null) { - return Pattern.matches(sqlToRegexLike(pattern.value), fieldValue) ? + return Pattern.matches(sqlToRegexLike(pattern.value).getJavaPatternString(), fieldValue) ? Result.TRUE : Result.FALSE; } else { - return Pattern.matches(sqlToRegexLike(pattern.value, escape.value), fieldValue) ? + return Pattern.matches(sqlToRegexLike(pattern.value, escape.value).getJavaPatternString(), fieldValue) ? Result.TRUE : Result.FALSE; } } http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java new file mode 100644 index 0000000..d8c1410 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java @@ -0,0 +1,674 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.expr.fn.impl; +import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestSqlPatterns { + + @Test + public void testSqlRegexLike() { + // Given SQL like pattern, verify patternType is correct. + // Java pattern should have % replaced with .*, _ replaced with . + // Simple pattern should have meta (% and _) and escape characters removed. + + // A%B is complex + RegexpUtil.SqlPatternInfo patternInfo = RegexpUtil.sqlToRegexLike("A%B"); + assertEquals("A.*B", patternInfo.getJavaPatternString()); + assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); + + // A_B is complex + patternInfo = RegexpUtil.sqlToRegexLike("A_B"); + assertEquals("A.B", patternInfo.getJavaPatternString()); + assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); + + // A%B%D is complex + patternInfo = RegexpUtil.sqlToRegexLike("A%B%D"); + assertEquals("A.*B.*D", patternInfo.getJavaPatternString()); + assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); + + // %AB% is contains + patternInfo = RegexpUtil.sqlToRegexLike("%AB%"); + assertEquals(".*AB.*", patternInfo.getJavaPatternString()); + assertEquals("AB", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.CONTAINS, patternInfo.getPatternType()); + + // %AB is ends with + patternInfo = RegexpUtil.sqlToRegexLike("%AB"); + assertEquals(".*AB", patternInfo.getJavaPatternString()); + assertEquals("AB", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.ENDS_WITH, patternInfo.getPatternType()); + + // AB% is starts with + patternInfo = RegexpUtil.sqlToRegexLike("AB%"); + assertEquals("AB.*", patternInfo.getJavaPatternString()); + assertEquals("AB", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.STARTS_WITH, patternInfo.getPatternType()); + + // AB is constant. + patternInfo = RegexpUtil.sqlToRegexLike("AB"); + assertEquals("AB", patternInfo.getJavaPatternString()); + assertEquals("AB", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.CONSTANT, patternInfo.getPatternType()); + + // Test with escape characters. + + // A%#B is invalid escape sequence + try { + patternInfo = RegexpUtil.sqlToRegexLike("A%#B", '#'); + } catch (Exception ex) { + assertTrue(ex.getMessage().contains("Invalid escape sequence")); + } + + // A#%B with # as escape character is constant A%B + patternInfo = RegexpUtil.sqlToRegexLike("A#%B", '#'); + assertEquals("A%B", patternInfo.getJavaPatternString()); + assertEquals("A%B", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.CONSTANT, patternInfo.getPatternType()); + + // %A#%B% is contains A%B + patternInfo = RegexpUtil.sqlToRegexLike("%A#%B%", '#'); + assertEquals(".*A%B.*", patternInfo.getJavaPatternString()); + assertEquals("A%B", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.CONTAINS, patternInfo.getPatternType()); + + // #%AB% is starts with %AB + patternInfo = RegexpUtil.sqlToRegexLike("#%AB%", '#'); + assertEquals("%AB.*", patternInfo.getJavaPatternString()); + assertEquals("%AB", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.STARTS_WITH, patternInfo.getPatternType()); + + // %#%AB#% is ends with %AB% + patternInfo = RegexpUtil.sqlToRegexLike("%#%AB#%", '#'); + assertEquals(".*%AB%", patternInfo.getJavaPatternString()); + assertEquals("%AB%", patternInfo.getSimplePatternString()); + assertEquals(RegexpUtil.SqlPatternType.ENDS_WITH, patternInfo.getPatternType()); + + // #_A#%B%C is complex + patternInfo = RegexpUtil.sqlToRegexLike("#_A#%B%C", '#'); + assertEquals("_A%B.*C", patternInfo.getJavaPatternString()); + assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); + + } + + @Test + public void testSqlPatternStartsWith() { + RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", "ABC"); + + StringBuffer sb = new StringBuffer("ABCD"); + SqlPatternMatcher sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternStartsWith.match(), 1); // ABCD should match StartsWith ABC + + sb.setLength(0); + sb.append("BCD"); + assertEquals(sqlPatternStartsWith.match(), 0); // BCD should not match StartsWith ABC + + sb.setLength(0); + sb.append("XYZABC"); + assertEquals(sqlPatternStartsWith.match(), 0); // XYZABC should not match StartsWith ABC + + // null text + sb.setLength(0); + assertEquals(sqlPatternStartsWith.match(), 0); // null String should not match StartsWith ABC + + // pattern length > txt length + sb.append("AB"); + assertEquals(sqlPatternStartsWith.match(), 0); // AB should not match StartsWith ABC + + // startsWith null pattern should match anything + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", ""); + sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternStartsWith.match(), 1); // AB should match StartsWith null pattern + + // null pattern and null text + sb.setLength(0); + assertEquals(sqlPatternStartsWith.match(), 1); // null text should match null pattern + + // wide character string. + sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" + + "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" + + "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" + + "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" + + " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" + + "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" + + "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" + + "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" + + "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" + + "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" + + "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" + + "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" + + "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" + + "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" + + "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" + + "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" + + "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" + + "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" + + "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" + + "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" + + "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" + + "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" + + "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" + + "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" + + "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" + + "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" + + "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" + + "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" + + "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" + + "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" + + "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" + + "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" + + "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" + + "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" + + "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" + + "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" + + "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" + + "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" + + "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" + + "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" + + "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" + + "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" + + "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" + + "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" + + "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" + + "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" + + "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" + + "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" + + "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" + + "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" + + "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" + + "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" + + "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" + + "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" + + "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" + + "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" + + "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" + + "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" + + "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" + + "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" + + "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" + + "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", + "b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4"); + sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternStartsWith.match(), 1); // should match + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", + "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); + sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternStartsWith.match(), 0); // should not match + + // non ascii + sb.setLength(0); + sb.append("¤EÃsÃW°ê»Ã®i¶T¤¤¤Ã3¼Ã®i¶TÃU2~~"); + + assertEquals(sqlPatternStartsWith.match(), 0); // should not match + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", "¤EÃsÃW"); + sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternStartsWith.match(), 1); // should match + + } + + @Test + public void testSqlPatternEndsWith() { + RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", "BCD"); + StringBuffer sb = new StringBuffer("ABCD"); + SqlPatternMatcher sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + + assertEquals(sqlPatternEndsWith.match(), 1); // ABCD should match EndsWith BCD + + sb.setLength(0); + sb.append("ABC"); + assertEquals(sqlPatternEndsWith.match(), 0); // ABC should not match EndsWith BCD + + sb.setLength(0); + assertEquals(sqlPatternEndsWith.match(), 0); // null string should not match EndsWith BCD + + sb.append("A"); + assertEquals(sqlPatternEndsWith.match(), 0); // ABCD should not match EndsWith A + + sb.setLength(0); + sb.append("XYZBCD"); + assertEquals(sqlPatternEndsWith.match(), 1); // XYZBCD should match EndsWith BCD + + // EndsWith null pattern should match anything + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"", ""); + sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternEndsWith.match(), 1); // AB should match StartsWith null pattern + + // null pattern and null text + sb.setLength(0); + assertEquals(sqlPatternEndsWith.match(), 1); // null text should match null pattern + + // wide character string. + sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" + + "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" + + "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" + + "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" + + " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" + + "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" + + "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" + + "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" + + "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" + + "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" + + "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" + + "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" + + "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" + + "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" + + "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" + + "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" + + "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" + + "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" + + "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" + + "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" + + "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" + + "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" + + "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" + + "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" + + "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" + + "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" + + "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" + + "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" + + "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" + + "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" + + "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" + + "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" + + "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" + + "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" + + "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" + + "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" + + "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" + + "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" + + "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" + + "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" + + "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" + + "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" + + "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" + + "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" + + "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" + + "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" + + "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" + + "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" + + "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" + + "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" + + "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" + + "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" + + "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" + + "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" + + "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" + + "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" + + "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" + + "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" + + "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" + + "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" + + "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" + + "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"", + "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); + sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternEndsWith.match(), 1); // should match + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"", + "atXzj4x3CgF6j1gn10aUJknF7KQLJ84D"); + sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternEndsWith.match(), 0); // should not match + + // non ascii + sb.setLength(0); + sb.append("¤EÃsÃW°ê»Ã®i¶T¤¤¤Ã3¼Ã®i¶TÃU2~~"); + + assertEquals(sqlPatternEndsWith.match(), 0); // should not match + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"", "TÃU2~~"); + sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternEndsWith.match(), 1); // should match + + } + + @Test + public void testSqlPatternContains() { + RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,".*ABC.*", "ABCD"); + + StringBuffer sb = new StringBuffer("ABCD"); + SqlPatternMatcher sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + + assertEquals(1, sqlPatternContains.match()); // ABCD should contain ABCD + + sb.setLength(0); + sb.append("BC"); + assertEquals(0, sqlPatternContains.match()); // ABCD should contain BC + + sb.setLength(0); + assertEquals(0, sqlPatternContains.match()); // null string should not match contains ABCD + + sb.append("DE"); + assertEquals(0, sqlPatternContains.match()); // ABCD should not contain DE + + sb.setLength(0); + sb.append("xyzABCDqrs"); + assertEquals(1, sqlPatternContains.match()); // xyzABCDqrs should contain ABCD + + // contains null pattern should match anything + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", ""); + sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternContains.match(), 1); // should match + + // null pattern and null text + sb.setLength(0); + assertEquals(sqlPatternContains.match(), 1); // null text should match null pattern + + // wide character string. + sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" + + "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" + + "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" + + "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" + + " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" + + "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" + + "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" + + "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" + + "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" + + "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" + + "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" + + "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" + + "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" + + "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" + + "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" + + "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" + + "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" + + "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" + + "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" + + "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" + + "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" + + "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" + + "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" + + "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" + + "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" + + "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" + + "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" + + "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" + + "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" + + "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" + + "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" + + "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" + + "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" + + "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" + + "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" + + "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" + + "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" + + "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" + + "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" + + "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" + + "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" + + "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" + + "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" + + "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" + + "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" + + "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" + + "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" + + "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" + + "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" + + "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" + + "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" + + "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" + + "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" + + "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" + + "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" + + "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" + + "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" + + "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" + + "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" + + "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" + + "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" + + "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", + "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW"); + sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternContains.match(), 1); + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", + "ABCDEF"); + sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternContains.match(), 0); + + // non ascii + sb.setLength(0); + sb.append("¤EÃsÃW°ê»Ã®i¶T¤¤¤Ã3¼Ã®i¶TÃU2~~"); + + assertEquals(sqlPatternContains.match(), 0); // should not match + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", "¶T¤¤¤Ã"); + sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternContains.match(), 1); // should match + + } + + @Test + public void testSqlPatternConstant() { + RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT,"ABC.*", "ABC"); + + StringBuffer sb = new StringBuffer("ABC"); + SqlPatternMatcher sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + + assertEquals(1, sqlPatternContains.match()); // ABC should match ABC + + sb.setLength(0); + sb.append("BC"); + assertEquals(0, sqlPatternContains.match()); // ABC not same as BC + + sb.setLength(0); + assertEquals(0, sqlPatternContains.match()); // null string not same as ABC + + sb.append("DE"); + assertEquals(0, sqlPatternContains.match()); // ABC not same as DE + + // null pattern should match null string + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT,"", ""); + sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + sb.setLength(0); + assertEquals(sqlPatternContains.match(), 1); // null text should match null pattern + + // wide character string. + sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" + + "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" + + "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" + + "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" + + " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" + + "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" + + "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" + + "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" + + "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" + + "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" + + "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" + + "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" + + "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" + + "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" + + "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" + + "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" + + "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" + + "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" + + "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" + + "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" + + "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" + + "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" + + "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" + + "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" + + "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" + + "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" + + "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" + + "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" + + "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" + + "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" + + "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" + + "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" + + "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" + + "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" + + "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" + + "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" + + "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" + + "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" + + "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" + + "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" + + "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" + + "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" + + "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" + + "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" + + "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" + + "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" + + "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" + + "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" + + "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" + + "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" + + "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" + + "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" + + "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" + + "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" + + "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" + + "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" + + "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" + + "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" + + "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" + + "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" + + "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" + + "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", sb.toString()); + sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + + assertEquals(sqlPatternContains.match(), 1); + + // non ascii + sb.setLength(0); + sb.append("¤EÃsÃW°ê»Ã®i¶T¤¤¤Ã3¼Ã®i¶TÃU2~~"); + + assertEquals(sqlPatternContains.match(), 0); // should not match + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT,"", "¤EÃsÃW°ê»Ã®i¶T¤¤¤Ã3¼Ã®i¶TÃU2~~"); + sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternContains.match(), 1); // should match + + } + + @Test + public void testSqlPatternNotSimple() { + RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX,"A.*BC.*", ""); + + StringBuffer sb = new StringBuffer("ADEBCDF"); + SqlPatternMatcher sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + + assertEquals(sqlPatternComplex.match(), 1); // ADEBCDF should match A.*BC.* + + sb.setLength(0); + sb.append("BC"); + assertEquals(sqlPatternComplex.match(), 0); // BC should not match A.*BC.* + + sb.setLength(0); + assertEquals(sqlPatternComplex.match(), 0); // null string should not match + + sb.append("DEFGHIJ"); + assertEquals(sqlPatternComplex.match(), 0); // DEFGHIJ should not match A.*BC.* + + java.util.regex.Matcher matcher; + matcher = java.util.regex.Pattern.compile("b00dUrA0.*").matcher(""); + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX,"b00dUrA0.*42.*9a8BZ", ""); + sb.setLength(0); + // wide character string. + sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" + + "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" + + "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" + + "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" + + " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" + + "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" + + "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" + + "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" + + "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" + + "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" + + "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" + + "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" + + "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" + + "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" + + "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" + + "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" + + "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" + + "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" + + "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" + + "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" + + "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" + + "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" + + "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" + + "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" + + "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" + + "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" + + "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" + + "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" + + "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" + + "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" + + "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" + + "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" + + "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" + + "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" + + "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" + + "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" + + "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" + + "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" + + "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" + + "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" + + "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" + + "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" + + "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" + + "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" + + "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" + + "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" + + "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" + + "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" + + "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" + + "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" + + "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" + + "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" + + "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" + + "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" + + "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" + + "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" + + "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" + + "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" + + "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" + + "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" + + "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" + + "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); + + sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternComplex.match(), 1); + + // non ascii + sb.setLength(0); + sb.append("¤EÃsÃW°ê»Ã®i¶T¤¤¤Ã3¼Ã®i¶TÃU2~~"); + + assertEquals(sqlPatternComplex.match(), 0); // should not match + + patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX,".*»Ã®i¶T¤¤¤.*¼Ã®i.*ÃU2~~", ""); + sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb); + assertEquals(sqlPatternComplex.match(), 1); // should match + + } + +}