Repository: drill Updated Branches: refs/heads/master a07f4de7e -> 8176fbca6
DRILL-4573: Zero copy LIKE, REGEXP_MATCHES, SUBSTR Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/8176fbca Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/8176fbca Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/8176fbca Branch: refs/heads/master Commit: 8176fbca6435ea2417a77b99e954ae10ca4bbdc4 Parents: a07f4de Author: jean-claude cote <[email protected]> Authored: Fri Apr 1 23:37:00 2016 -0400 Committer: Hsuan-Yi Chu <[email protected]> Committed: Mon Apr 25 09:47:07 2016 -0700 ---------------------------------------------------------------------- .../exec/expr/fn/impl/CharSequenceWrapper.java | 49 +++++++++++ .../expr/fn/impl/SimpleRepeatedFunctions.java | 11 ++- .../exec/expr/fn/impl/StringFunctions.java | 91 +++++++++++++++----- 3 files changed, 128 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java new file mode 100644 index 0000000..6c475ed --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +import io.netty.buffer.DrillBuf; + +public class CharSequenceWrapper implements CharSequence { + + private int start; + private int end; + private DrillBuf buffer; + + @Override + public int length() { + return end - start; + } + + @Override + public char charAt(int index) { + return (char) buffer.getByte(start + index); + } + + @Override + public CharSequence subSequence(int start, int end) { + throw new UnsupportedOperationException("Not implemented."); + } + + public void setBuffer(int start, int end, DrillBuf buffer) { + this.start = start; + this.end = end; + this.buffer = buffer; + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java index 253c92a..7825289 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java @@ -327,6 +327,7 @@ public class SimpleRepeatedFunctions { @Param VarCharHolder targetValue; @Workspace VarCharHolder currVal; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Output BitHolder out; @@ -334,15 +335,19 @@ public class SimpleRepeatedFunctions { currVal = new VarCharHolder(); matcher = java.util.regex.Pattern.compile( org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(targetValue.start, targetValue.end, targetValue.buffer)).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } public void eval() { for (int i = listToSearch.start; i < listToSearch.end; i++) { out.value = 0; listToSearch.vector.getAccessor().get(i, currVal); - String in = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(currVal.start, currVal.end, currVal.buffer); - matcher.reset(in); - if(matcher.find()) { + charSequenceWrapper.setBuffer(currVal.start, currVal.end, currVal.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); + if(matcher.find()) { out.value = 1; break; } http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java index 78de816..0ce1c4e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java @@ -55,17 +55,22 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder pattern; @Output BitHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( // org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); out.value = matcher.matches()? 1:0; } } @@ -78,18 +83,23 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder escape; @Output BitHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( // org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer), org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer))).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); out.value = matcher.matches()? 1:0; } } @@ -101,18 +111,23 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder pattern; @Output BitHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( // org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)), java.util.regex.Pattern.CASE_INSENSITIVE).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); out.value = matcher.matches()? 1:0; } } @@ -125,6 +140,7 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder escape; @Output BitHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { @@ -132,12 +148,16 @@ public class StringFunctions{ org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer), org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer)), java.util.regex.Pattern.CASE_INSENSITIVE).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); out.value = matcher.matches()? 1:0; } } @@ -148,16 +168,21 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder pattern; @Output BitHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); out.value = matcher.matches()? 1:0; } } @@ -169,18 +194,23 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder escape; @Output BitHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar( org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer), org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer))).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); out.value = matcher.matches()? 1:0; } } @@ -196,19 +226,25 @@ public class StringFunctions{ @Param VarCharHolder replacement; @Inject DrillBuf buffer; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Output VarCharHolder out; @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { out.start = 0; - final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); final String r = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(replacement.start, replacement.end, replacement.buffer); - final byte [] bytea = matcher.reset(i).replaceAll(r).getBytes(java.nio.charset.Charset.forName("UTF-8")); + final byte [] bytea = matcher.replaceAll(r).getBytes(java.nio.charset.Charset.forName("UTF-8")); out.buffer = buffer = buffer.reallocIfNeeded(bytea.length); out.buffer.setBytes(out.start, bytea); out.end = bytea.length; @@ -227,17 +263,22 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder pattern; @Inject DrillBuf buffer; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Output BitHolder out; @Override public void setup() { matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); out.value = matcher.matches()? 1:0; } } @@ -566,18 +607,23 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder pattern; @Output NullableVarCharHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { matcher = java.util.regex.Pattern.compile( org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)) .matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override public void eval() { - final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); if (matcher.find()) { out.isSet = 1; out.buffer = input.buffer; @@ -593,12 +639,15 @@ public class StringFunctions{ @Param(constant=true) VarCharHolder pattern; @Output NullableVarCharHolder out; @Workspace java.util.regex.Matcher matcher; + @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper; @Override public void setup() { matcher = java.util.regex.Pattern.compile( org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)) .matcher(""); + charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper(); + matcher.reset(charSequenceWrapper); } @Override @@ -606,8 +655,10 @@ public class StringFunctions{ if (input.isSet == 0) { out.isSet = 0; } else { - final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); - matcher.reset(i); + charSequenceWrapper.setBuffer(input.start, input.end, input.buffer); + // Reusing same charSequenceWrapper, no need to pass it in. + // This saves one method call since reset(CharSequence) calls reset() + matcher.reset(); if (matcher.find()) { out.isSet = 1; out.buffer = input.buffer;
