[ 
https://issues.apache.org/jira/browse/PHOENIX-1287?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14363871#comment-14363871
 ] 

ASF GitHub Bot commented on PHOENIX-1287:
-----------------------------------------

Github user JamesRTaylor commented on a diff in the pull request:

    https://github.com/apache/phoenix/pull/46#discussion_r26525162
  
    --- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/expression/util/regex/JONIRegexWrapper.java
 ---
    @@ -0,0 +1,181 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.phoenix.expression.util.regex;
    +
    +import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    +import org.apache.phoenix.schema.SortOrder;
    +import org.jcodings.Encoding;
    +import org.jcodings.specific.UTF8Encoding;
    +import org.joni.Matcher;
    +import org.joni.Option;
    +import org.joni.Regex;
    +
    +import com.google.common.base.Preconditions;
    +
    +public class JONIRegexWrapper {
    +
    +    private static final Encoding PVARCHAR_ENCODING = 
UTF8Encoding.INSTANCE;
    +
    +    static class JONIPattern extends AbstractBasePattern {
    +
    +        private final Regex pattern;
    +        private boolean isLastMatcherStringNull;
    +        private final String patternString;
    +
    +        JONIPattern(String patternString) {
    +            this(patternString, 0);
    +        }
    +
    +        JONIPattern(String patternString, int flags) {
    +            this.patternString = patternString;
    +            if (patternString != null) {
    +                patternString = replacePatternQuote(patternString);
    +                byte[] patternBytes = patternString.getBytes();
    +                pattern = new Regex(patternBytes, 0, patternBytes.length, 
flags, PVARCHAR_ENCODING);
    +            } else {
    +                pattern = null;
    +            }
    +            isLastMatcherStringNull = false;
    +        }
    +
    +        public enum ReplaceQuoteMachine {
    +            STOP, ZERO, ONE_BACKSLASH, IN_QUOTE, IN_QUOTE_ONE_BACKSLASH;
    +
    +            ReplaceQuoteMachine next(StringBuilder sb, char ch) {
    +                switch (this) {
    +                case ZERO:
    +                    switch (ch) {
    +                    case '\\':
    +                        return ONE_BACKSLASH;
    +                    default:
    +                        sb.append(ch);
    +                        return ZERO;
    +                    }
    +                case ONE_BACKSLASH:
    +                    switch (ch) {
    +                    case 'Q':
    +                        return IN_QUOTE;
    +                    default:
    +                        sb.append('\\');
    +                        return ZERO.next(sb, ch);
    +                    }
    +                case IN_QUOTE:
    +                    // add backslashes for .^$*+?()[{\|
    +                    switch (ch) {
    +                    case '.':
    +                    case '^':
    +                    case '$':
    +                    case '*':
    +                    case '+':
    +                    case '?':
    +                    case '(':
    +                    case ')':
    +                    case '[':
    +                    case '{':
    +                    case '|':
    +                        sb.append('\\').append(ch);
    +                        return IN_QUOTE;
    +                    case '\\':
    +                        return IN_QUOTE_ONE_BACKSLASH;
    +                    default:
    +                        sb.append(ch);
    +                        return IN_QUOTE;
    +                    }
    +                case IN_QUOTE_ONE_BACKSLASH:
    +                    switch (ch) {
    +                    case 'E':
    +                        return ZERO;
    +                    default:
    +                        sb.append('\\');
    +                        return IN_QUOTE.next(sb, ch);
    +                    }
    +                case STOP:
    +                default:
    +                    throw new IllegalArgumentException();
    +                }
    +            }
    +
    +            ReplaceQuoteMachine EOF(StringBuilder sb) {
    +                switch (this) {
    +                case ONE_BACKSLASH:
    +                case IN_QUOTE_ONE_BACKSLASH:
    +                    sb.append('\\');
    +                case ZERO:
    +                case IN_QUOTE:
    +                    return STOP;
    +                case STOP:
    +                default:
    +                    throw new IllegalArgumentException();
    +                }
    +            }
    +        }
    +
    +        private String replacePatternQuote(String patternString) {
    +            StringBuilder sb = new StringBuilder();
    +            ReplaceQuoteMachine cur = ReplaceQuoteMachine.ZERO;
    +            for (int i = 0; i < patternString.length(); ++i) {
    +                cur = cur.next(sb, patternString.charAt(i));
    +            }
    +            cur = cur.EOF(sb);
    +            return sb.toString();
    +        }
    +
    +        @Override
    +        public AbstractBaseMatcher macher(ImmutableBytesWritable ptr, 
SortOrder sortOrder) {
    +            Preconditions.checkNotNull(ptr);
    +            Preconditions.checkNotNull(sortOrder);
    +            byte[] matcherSourceBytes = 
Utils.immutableBytesWritableToBytes(ptr, sortOrder);
    --- End diff --
    
    I hope there's a pattern.matcher(byte[] buf, int offset) method we can use 
instead below, as this will cause a copy of the underlying byte[]. 
    
    If this is necessary, can you use ByteUtil.copyKeyBytesIfNecessary() as it 
prevents a copy unless necessary. I think this logic may be able to be pulled 
out of here and into LikeExpression.evaluate as it should be the same in both 
cases.


> Use the joni byte[] regex engine in place of j.u.regex
> ------------------------------------------------------
>
>                 Key: PHOENIX-1287
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-1287
>             Project: Phoenix
>          Issue Type: Bug
>            Reporter: James Taylor
>            Assignee: Shuxiong Ye
>              Labels: gsoc2015
>
> See HBASE-11907. We'd get a 2x perf benefit plus it's driven off of byte[] 
> instead of strings.Thanks for the pointer, [~apurtell].



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to