[GitHub] drill pull request #1015: DRILL-5899: Simple pattern matchers can work with ...
Github user asfgit closed the pull request at: https://github.com/apache/drill/pull/1015 ---
[GitHub] drill pull request #1015: DRILL-5899: Simple pattern matchers can work with ...
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1015#discussion_r149554356 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java --- @@ -17,33 +17,30 @@ */ package org.apache.drill.exec.expr.fn.impl; -public class SqlPatternEndsWithMatcher implements SqlPatternMatcher { - final String patternString; - CharSequence charSequenceWrapper; - final int patternLength; - - public SqlPatternEndsWithMatcher(String patternString, CharSequence charSequenceWrapper) { -this.charSequenceWrapper = charSequenceWrapper; -this.patternString = patternString; -this.patternLength = patternString.length(); +import io.netty.buffer.DrillBuf; + +public class SqlPatternEndsWithMatcher extends AbstractSqlPatternMatcher { + + public SqlPatternEndsWithMatcher(String patternString) { +super(patternString); } @Override - public int match() { -int txtIndex = charSequenceWrapper.length(); -int patternIndex = patternLength; -boolean matchFound = true; // if pattern is empty string, we always match. + public int match(int start, int end, DrillBuf drillBuf) { + +if ( (end - start) < patternLength) { // No match if input string length is less than pattern length. --- End diff -- `( (end - start)` --> `(end - start` ---
[GitHub] drill pull request #1015: DRILL-5899: Simple pattern matchers can work with ...
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1015#discussion_r149554195 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java --- @@ -17,37 +17,48 @@ */ package org.apache.drill.exec.expr.fn.impl; -public class SqlPatternContainsMatcher implements SqlPatternMatcher { - final String patternString; - CharSequence charSequenceWrapper; - final int patternLength; - - public SqlPatternContainsMatcher(String patternString, CharSequence charSequenceWrapper) { -this.patternString = patternString; -this.charSequenceWrapper = charSequenceWrapper; -patternLength = patternString.length(); +import io.netty.buffer.DrillBuf; + +public class SqlPatternContainsMatcher extends AbstractSqlPatternMatcher { + + public SqlPatternContainsMatcher(String patternString) { +super(patternString); } @Override - public int match() { -final int txtLength = charSequenceWrapper.length(); -int patternIndex = 0; -int txtIndex = 0; - -// simplePattern string has meta characters i.e % and _ and escape characters removed. -// so, we can just directly compare. -while (patternIndex < patternLength && txtIndex < txtLength) { - if (patternString.charAt(patternIndex) != charSequenceWrapper.charAt(txtIndex)) { -// Go back if there is no match -txtIndex = txtIndex - patternIndex; -patternIndex = 0; - } else { -patternIndex++; + public int match(int start, int end, DrillBuf drillBuf) { + +if (patternLength == 0) { // Everything should match for null pattern string + return 1; +} + +final int txtLength = end - start; + +// no match if input string length is less than pattern length +if (txtLength < patternLength) { + return 0; +} + +outer: +for (int txtIndex = 0; txtIndex < txtLength; txtIndex++) { + + // boundary check + if (txtIndex + patternLength > txtLength) { --- End diff -- Better: ``` int end = txtLength - patternLength; for (int txtIndex = 0; txtIndex < end; txtIndex++) { ``` And omit the boundary check on every iteration. That is, no reason to iterate past the last possible match, then use an if-statement to shorten the loop. Just shorten the loop. ---
[GitHub] drill pull request #1015: DRILL-5899: Simple pattern matchers can work with ...
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1015#discussion_r149552453 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/AbstractSqlPatternMatcher.java --- @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +import com.google.common.base.Charsets; +import org.apache.drill.common.exceptions.UserException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetEncoder; +import static org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.logger; + +// To get good performance for most commonly used pattern matches --- End diff -- Javadoc? ``` /** * This is a Javadoc comment and appears in generated documentation. */ // This is a plain comment and does not appear in documentation. ``` ---
[GitHub] drill pull request #1015: DRILL-5899: Simple pattern matchers can work with ...
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1015#discussion_r149552506 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/AbstractSqlPatternMatcher.java --- @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +import com.google.common.base.Charsets; +import org.apache.drill.common.exceptions.UserException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetEncoder; +import static org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.logger; + +// To get good performance for most commonly used pattern matches +// i.e. CONSTANT('ABC'), STARTSWITH('%ABC'), ENDSWITH('ABC%') and CONTAINS('%ABC%'), +// we have simple pattern matchers. +// Idea is to have our own implementation for simple pattern matchers so we can +// avoid heavy weight regex processing, skip UTF-8 decoding and char conversion. +// Instead, we encode the pattern string and do byte comparison against native memory. +// Overall, this approach +// gives us orders of magnitude performance improvement for simple pattern matches. +// Anything that is not simple is considered +// complex pattern and we use Java regex for complex pattern matches. + +public abstract class AbstractSqlPatternMatcher implements SqlPatternMatcher { + final String patternString; --- End diff -- `protected final` ---
[GitHub] drill pull request #1015: DRILL-5899: Simple pattern matchers can work with ...
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/1015#discussion_r149555002 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java --- @@ -17,33 +17,30 @@ */ package org.apache.drill.exec.expr.fn.impl; -public class SqlPatternEndsWithMatcher implements SqlPatternMatcher { - final String patternString; - CharSequence charSequenceWrapper; - final int patternLength; - - public SqlPatternEndsWithMatcher(String patternString, CharSequence charSequenceWrapper) { -this.charSequenceWrapper = charSequenceWrapper; -this.patternString = patternString; -this.patternLength = patternString.length(); +import io.netty.buffer.DrillBuf; + +public class SqlPatternEndsWithMatcher extends AbstractSqlPatternMatcher { + + public SqlPatternEndsWithMatcher(String patternString) { +super(patternString); } @Override - public int match() { -int txtIndex = charSequenceWrapper.length(); -int patternIndex = patternLength; -boolean matchFound = true; // if pattern is empty string, we always match. + public int match(int start, int end, DrillBuf drillBuf) { + +if ( (end - start) < patternLength) { // No match if input string length is less than pattern length. + return 0; +} // simplePattern string has meta characters i.e % and _ and escape characters removed. // so, we can just directly compare. -while (patternIndex > 0 && txtIndex > 0) { - if (charSequenceWrapper.charAt(--txtIndex) != patternString.charAt(--patternIndex)) { -matchFound = false; -break; +for (int index = 1; index <= patternLength; index++) { --- End diff -- ``` int txtStart = end - patternLength; if (txtStart < start) { return 0; } for (int index = 0; index < patternLength; index++) { ... patternByteBuffer.get(index) ... drillBuf.getByte(txtStart + index) ... ``` ---