Yingyi Bu has submitted this change and it was merged.

Change subject: Add string function repeat and split.
......................................................................


Add string function repeat and split.

Change-Id: Ib9de5a59807d5ff51fa5d72444053f87cf8dd289
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1141
Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <ti...@apache.org>
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
R 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
A 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java
A 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java
A 
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java
A 
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java
M 
hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
14 files changed, 536 insertions(+), 17 deletions(-)

Approvals:
  Till Westmann: Looks good to me, approved
  Jenkins: Verified; Verified

Objections:
  Jenkins: Violations found



diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
index a1746cc..27454e3 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
@@ -222,6 +222,8 @@
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionWithFlagDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceWithFlagsDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringRepeatDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringSplitDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringStartsWithDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringToCodePointDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringTrim2Descriptor;
@@ -513,6 +515,8 @@
         functionsToInjectUnkownHandling.add(StringLTrim2Descriptor.FACTORY);
         functionsToInjectUnkownHandling.add(StringRTrim2Descriptor.FACTORY);
         functionsToInjectUnkownHandling.add(StringPositionDescriptor.FACTORY);
+        functionsToInjectUnkownHandling.add(StringRepeatDescriptor.FACTORY);
+        functionsToInjectUnkownHandling.add(StringSplitDescriptor.FACTORY);
 
         // Constructors
         
functionsToInjectUnkownHandling.add(ABooleanConstructorDescriptor.FACTORY);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp
new file mode 100644
index 0000000..17904c7
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+  "a": repeat(" new ", 2),
+  "b": repeat(" abcx ", 0),
+  "c": repeat("", 2),
+  "d": repeat(null, 2),
+  "e": repeat("asc", null),
+  "f": repeat(missing, 2),
+  "g": repeat("asc", missing),
+  "h": repeat(null, null),
+  "i": repeat(missing, missing)
+};
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp
new file mode 100644
index 0000000..0701ce3
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+SELECT VALUE repeat(" new ", -1);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp
new file mode 100644
index 0000000..3fb2307
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+  'a': split("abc", "b"),
+  'b': split("abc", "abc"),
+  'c': split("abc", "x"),
+  'd': split("abc", "a"),
+  'e': split("abc", "bc"),
+  'f': split("abc", ""),
+  'g': split("", ""),
+  'h': split("", "abc"),
+  'i': split("", null),
+  'j': split(null, "a"),
+  'k': split("a", missing),
+  'l': split(missing, 'a'),
+  'm': split(null, missing),
+  'n': split(null, null),
+  'o': split(missing, missing)
+};
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm
new file mode 100644
index 0000000..7b8b7fc
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm
@@ -0,0 +1 @@
+{ "a": " new  new ", "b": "", "c": "", "d": null, "e": null, "h": null }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm
new file mode 100644
index 0000000..3de2947
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm
@@ -0,0 +1 @@
+{ "a": [ "a", "c" ], "b": [ "", "" ], "c": [ "abc" ], "d": [ "", "bc" ], "e": 
[ "a", "" ], "f": [ "a", "b", "c" ], "g": [  ], "h": [ "" ], "i": null, "j": 
null, "n": null }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 01a036c..fb9a8e8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -5351,6 +5351,17 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="string">
+      <compilation-unit name="repeat">
+        <output-dir compare="Text">repeat</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="repeat_error">
+        <output-dir compare="Text">repeat</output-dir>
+        <expected-error>repeat: expects a non-negative repeating number but 
got -1</expected-error>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
       <compilation-unit name="regexp_replace">
         <output-dir compare="Text">replace22</output-dir>
       </compilation-unit>
@@ -5386,6 +5397,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="string">
+      <compilation-unit name="split">
+        <output-dir compare="Text">split</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
       <compilation-unit name="start-with1">
         <output-dir compare="Text">start-with1</output-dir>
       </compilation-unit>
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
index dc2412c..da6ee02 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
@@ -78,7 +78,6 @@
 import org.apache.asterix.om.typecomputer.impl.OpenRecordConstructorResultType;
 import 
org.apache.asterix.om.typecomputer.impl.OrderedListConstructorTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.OrderedListOfAInt32TypeComputer;
-import org.apache.asterix.om.typecomputer.impl.OrderedListOfAInt64TypeComputer;
 import 
org.apache.asterix.om.typecomputer.impl.OrderedListOfAIntervalTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.OrderedListOfAPointTypeComputer;
 import 
org.apache.asterix.om.typecomputer.impl.OrderedListOfAStringTypeComputer;
@@ -90,9 +89,11 @@
 import 
org.apache.asterix.om.typecomputer.impl.ScalarVersionOfAggregateResultType;
 import org.apache.asterix.om.typecomputer.impl.StringBooleanTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.StringInt32TypeComputer;
+import org.apache.asterix.om.typecomputer.impl.StringIntToStringTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.StringStringTypeComputer;
+import org.apache.asterix.om.typecomputer.impl.StringToInt64ListTypeComputer;
+import org.apache.asterix.om.typecomputer.impl.StringToStringListTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.SubsetCollectionTypeComputer;
-import org.apache.asterix.om.typecomputer.impl.Substring2TypeComputer;
 import org.apache.asterix.om.typecomputer.impl.SubstringTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.SwitchCaseComputer;
 import org.apache.asterix.om.typecomputer.impl.UnaryBinaryInt64TypeComputer;
@@ -310,6 +311,10 @@
             "string-concat", 1);
     public static final FunctionIdentifier STRING_JOIN = new 
FunctionIdentifier(FunctionConstants.ASTERIX_NS,
             "string-join", 2);
+    public static final FunctionIdentifier STRING_REPEAT = new 
FunctionIdentifier(FunctionConstants.ASTERIX_NS,
+            "repeat", 2);
+    public static final FunctionIdentifier STRING_SPLIT = new 
FunctionIdentifier(FunctionConstants.ASTERIX_NS, "split",
+            2);
 
     public static final FunctionIdentifier DATASET = new 
FunctionIdentifier(FunctionConstants.ASTERIX_NS, "dataset", 1);
     public static final FunctionIdentifier FEED_COLLECT = new 
FunctionIdentifier(FunctionConstants.ASTERIX_NS,
@@ -884,10 +889,10 @@
 
         addFunction(STRING_LIKE, BooleanFunctionTypeComputer.INSTANCE, true);
         addFunction(STRING_CONTAINS, ABooleanTypeComputer.INSTANCE, true);
-        addFunction(STRING_TO_CODEPOINT, 
OrderedListOfAInt64TypeComputer.INSTANCE, true);
+        addFunction(STRING_TO_CODEPOINT, 
StringToInt64ListTypeComputer.INSTANCE, true);
         addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true);
         addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true);
-        addFunction(SUBSTRING2, Substring2TypeComputer.INSTANCE, true);
+        addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE, true);
         addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, 
true);
         addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true);
         addFunction(STRING_UPPERCASE, StringStringTypeComputer.INSTANCE, true);
@@ -913,6 +918,8 @@
         addFunction(SUBSTRING_AFTER, StringStringTypeComputer.INSTANCE, true);
         addPrivateFunction(STRING_EQUAL, StringBooleanTypeComputer.INSTANCE, 
true);
         addFunction(STRING_JOIN, AStringTypeComputer.INSTANCE, true);
+        addFunction(STRING_REPEAT, StringIntToStringTypeComputer.INSTANCE, 
true);
+        addFunction(STRING_SPLIT, StringToStringListTypeComputer.INSTANCE, 
true);
 
         addPrivateFunction(ORDERED_LIST_CONSTRUCTOR, 
OrderedListConstructorTypeComputer.INSTANCE, true);
         addFunction(POINT_CONSTRUCTOR, APointTypeComputer.INSTANCE, true);
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
similarity index 91%
rename from 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java
rename to 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
index e2e812e..7bb83d0 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
@@ -25,8 +25,8 @@
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 
-public class Substring2TypeComputer extends AbstractResultTypeComputer {
-    public static final Substring2TypeComputer INSTANCE = new 
Substring2TypeComputer();
+public class StringIntToStringTypeComputer extends AbstractResultTypeComputer {
+    public static final StringIntToStringTypeComputer INSTANCE = new 
StringIntToStringTypeComputer();
 
     @Override
     public void checkArgType(int argIndex, IAType type) throws 
AlgebricksException {
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java
new file mode 100644
index 0000000..b01ac71
--- /dev/null
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.om.typecomputer.impl;
+
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+
+public class StringToInt64ListTypeComputer extends AbstractStringTypeComputer {
+
+    public static final StringToInt64ListTypeComputer INSTANCE = new 
StringToInt64ListTypeComputer();
+
+    private StringToInt64ListTypeComputer() {
+    }
+
+    @Override
+    protected IAType getResultType(ILogicalExpression expr, IAType... 
strippedInputTypes) throws AlgebricksException {
+        return new AOrderedListType(BuiltinType.AINT64, null);
+    }
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java
new file mode 100644
index 0000000..4891330
--- /dev/null
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.om.typecomputer.impl;
+
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+
+public class StringToStringListTypeComputer extends AbstractStringTypeComputer 
{
+
+    public static final StringToStringListTypeComputer INSTANCE = new 
StringToStringListTypeComputer();
+
+    private StringToStringListTypeComputer() {
+    }
+
+    @Override
+    protected IAType getResultType(ILogicalExpression expr, IAType... 
strippedInputTypes) throws AlgebricksException {
+        return new AOrderedListType(BuiltinType.ASTRING, null);
+    }
+}
\ No newline at end of file
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java
new file mode 100644
index 0000000..0f4c0de
--- /dev/null
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.runtime.evaluators.functions;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptor;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.EnumDeserializer;
+import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
+import 
org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class StringRepeatDescriptor extends 
AbstractScalarFunctionDynamicDescriptor {
+    private static final long serialVersionUID = 1L;
+    public static final IFunctionDescriptorFactory FACTORY = new 
IFunctionDescriptorFactory() {
+        @Override
+        public IFunctionDescriptor createFunctionDescriptor() {
+            return new StringRepeatDescriptor();
+        }
+    };
+
+    @Override
+    public IScalarEvaluatorFactory createEvaluatorFactory(final 
IScalarEvaluatorFactory[] args)
+            throws AlgebricksException {
+        return new IScalarEvaluatorFactory() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public IScalarEvaluator createScalarEvaluator(final 
IHyracksTaskContext ctx) throws AlgebricksException {
+                return new IScalarEvaluator() {
+                    // Argument evaluators.
+                    private IScalarEvaluator evalString = 
args[0].createScalarEvaluator(ctx);
+                    private IScalarEvaluator evalStart = 
args[1].createScalarEvaluator(ctx);
+
+                    // Argument pointers.
+                    private IPointable argString = new VoidPointable();
+                    private IPointable argNumber = new VoidPointable();
+
+                    // For outputting the result.
+                    private ArrayBackedValueStorage resultStorage = new 
ArrayBackedValueStorage();
+                    private DataOutput out = resultStorage.getDataOutput();
+                    private byte[] tempLengthArray = new byte[5];
+
+                    @Override
+                    public void evaluate(IFrameTupleReference tuple, 
IPointable result) throws AlgebricksException {
+                        resultStorage.reset();
+
+                        // Calls argument evaluators.
+                        evalStart.evaluate(tuple, argNumber);
+                        evalString.evaluate(tuple, argString);
+
+                        // Gets the repeating times.
+                        int repeatingTimes = 0;
+                        byte[] bytes = argNumber.getByteArray();
+                        int offset = argNumber.getStartOffset();
+                        try {
+                            repeatingTimes = 
ATypeHierarchy.getIntegerValue(bytes, offset);
+                        } catch (HyracksDataException e1) {
+                            throw new AlgebricksException(e1);
+                        }
+                        // Checks repeatingTimes. It should be a non-negative 
value.
+                        if (repeatingTimes < 0) {
+                            throw new 
AlgebricksException(StringRepeatDescriptor.this.getIdentifier().getName()
+                                    + ": expects a non-negative repeating 
number but got " + repeatingTimes + ".");
+                        }
+
+                        // Gets the input string.
+                        bytes = argString.getByteArray();
+                        offset = argString.getStartOffset();
+                        // Checks the type of the string argument.
+                        if (bytes[offset] != 
ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+                            throw new 
AlgebricksException(StringRepeatDescriptor.this.getIdentifier().getName()
+                                    + ": expects type STRING for the first 
argument but got "
+                                    + 
EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset]));
+                        }
+
+                        // Calculates the result string length.
+                        int inputLen = UTF8StringUtil.getUTFLength(bytes, 
offset + 1);
+                        int resultLen = Math.multiplyExact(inputLen, 
repeatingTimes); // Can throw overflow exception.
+                        int cbytes = 
UTF8StringUtil.encodeUTF8Length(resultLen, tempLengthArray, 0);
+
+                        // Writes the output string.
+                        int inputStringStart = offset + 1 + 
UTF8StringUtil.getNumBytesToStoreLength(inputLen);
+                        try {
+                            out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+                            out.write(tempLengthArray, 0, cbytes);
+                            for (int numRepeats = 0; numRepeats < 
repeatingTimes; ++numRepeats) {
+                                out.write(bytes, inputStringStart, inputLen);
+                            }
+                        } catch (IOException e) {
+                            throw new AlgebricksException(e);
+                        }
+                        result.set(resultStorage);
+                    }
+                };
+            }
+        };
+    }
+
+    @Override
+    public FunctionIdentifier getIdentifier() {
+        return AsterixBuiltinFunctions.STRING_REPEAT;
+    }
+
+}
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java
new file mode 100644
index 0000000..8cd3a5b
--- /dev/null
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.runtime.evaluators.functions;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptor;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.EnumDeserializer;
+import 
org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class StringSplitDescriptor extends 
AbstractScalarFunctionDynamicDescriptor {
+
+    private static final long serialVersionUID = 1L;
+
+    public static final IFunctionDescriptorFactory FACTORY = new 
IFunctionDescriptorFactory() {
+        @Override
+        public IFunctionDescriptor createFunctionDescriptor() {
+            return new StringSplitDescriptor();
+        }
+    };
+
+    @Override
+    public IScalarEvaluatorFactory createEvaluatorFactory(final 
IScalarEvaluatorFactory[] args) {
+        return new IScalarEvaluatorFactory() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public IScalarEvaluator createScalarEvaluator(final 
IHyracksTaskContext ctx) throws AlgebricksException {
+                return new IScalarEvaluator() {
+                    // Argument evaluators.
+                    private final IScalarEvaluator stringEval = 
args[0].createScalarEvaluator(ctx);
+                    private final IScalarEvaluator patternEval = 
args[1].createScalarEvaluator(ctx);
+
+                    // Argument pointers.
+                    private final IPointable argString = new VoidPointable();
+                    private final IPointable argPattern = new VoidPointable();
+                    private final UTF8StringPointable argStrPtr = new 
UTF8StringPointable();
+                    private final UTF8StringPointable argPatternPtr = new 
UTF8StringPointable();
+
+                    // For an output string item.
+                    private final ArrayBackedValueStorage itemStorge = new 
ArrayBackedValueStorage();
+                    private final DataOutput itemOut = 
itemStorge.getDataOutput();
+                    private final byte[] tempLengthArray = new byte[5];
+
+                    // For the output list of strings.
+                    private final AOrderedListType intListType = new 
AOrderedListType(BuiltinType.ASTRING, null);
+                    private final OrderedListBuilder listBuilder = new 
OrderedListBuilder();
+                    private final ArrayBackedValueStorage resultStorage = new 
ArrayBackedValueStorage();
+                    private final DataOutput out = 
resultStorage.getDataOutput();
+
+                    @Override
+                    public void evaluate(IFrameTupleReference tuple, 
IPointable result) throws AlgebricksException {
+                        try {
+                            resultStorage.reset();
+                            // Calls argument evaluators.
+                            stringEval.evaluate(tuple, argString);
+                            patternEval.evaluate(tuple, argPattern);
+
+                            // Gets the bytes of the source string.
+                            byte[] srcString = argString.getByteArray();
+                            int srcOffset = argString.getStartOffset();
+                            int srcLen = argString.getLength();
+                            // Type check for the first argument.
+                            if (srcString[srcOffset] != 
ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+                                throw new 
AlgebricksException(StringSplitDescriptor.this.getIdentifier().getName()
+                                        + ": expects input type STRING for the 
first argument but got "
+                                        + 
EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(srcString[srcOffset]));
+                            }
+
+                            // Gets the bytes of the pattern string.
+                            byte[] patternString = argPattern.getByteArray();
+                            int patternOffset = argPattern.getStartOffset();
+                            int patternLen = argPattern.getLength();
+                            // Type check for the second argument.
+                            if (patternString[patternOffset] != 
ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+                                throw new 
AlgebricksException(StringSplitDescriptor.this.getIdentifier().getName()
+                                        + ": expects input type STRING for the 
second argument but got "
+                                        + EnumDeserializer.ATYPETAGDESERIALIZER
+                                                
.deserialize(patternString[patternOffset]));
+                            }
+
+                            // Sets the UTF8 String pointables.
+                            argStrPtr.set(srcString, srcOffset + 1, srcLen - 
1);
+                            argPatternPtr.set(patternString, patternOffset + 
1, patternLen - 1);
+
+                            // Gets the string length of the source string.
+                            int inputStringLen = 
UTF8StringUtil.getUTFLength(srcString, srcOffset + 1);
+                            int inputStringStart = srcOffset + 1
+                                    + 
UTF8StringUtil.getNumBytesToStoreLength(inputStringLen);
+                            // Gets the string length of the pattern string.
+                            int inputPatternLen = 
UTF8StringUtil.getUTFLength(patternString, patternOffset + 1);
+                            // Handles the case that the pattern is "".
+                            boolean emptyStringPattern = inputPatternLen == 0;
+
+                            // Builds a list of strings.
+                            listBuilder.reset(intListType);
+                            int itemStrStart = 0;
+                            int nextMatchStart;
+                            while (itemStrStart < inputStringLen && 
(nextMatchStart = UTF8StringPointable
+                                    .find(argStrPtr, argPatternPtr, false, 
itemStrStart)) >= 0) {
+                                // Adds an item string.
+                                addItemString(srcString, inputStringStart, 
itemStrStart,
+                                        emptyStringPattern ? nextMatchStart + 
1 : nextMatchStart);
+                                itemStrStart = nextMatchStart + 
(emptyStringPattern ? 1 : inputPatternLen);
+                            }
+                            if (!emptyStringPattern) {
+                                addItemString(srcString, inputStringStart, 
itemStrStart, inputStringLen);
+                            }
+                            listBuilder.write(out, true);
+                            result.set(resultStorage);
+                        } catch (IOException e1) {
+                            throw new AlgebricksException(e1);
+                        }
+                    }
+
+                    private void addItemString(byte[] srcString, int 
inputStringStart, int itemStartOffset,
+                            int nextMatchStart) throws IOException {
+                        int itemLen = nextMatchStart - itemStartOffset;
+                        int cbytes = UTF8StringUtil.encodeUTF8Length(itemLen, 
tempLengthArray, 0);
+                        itemStorge.reset();
+                        itemOut.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+                        itemOut.write(tempLengthArray, 0, cbytes);
+                        if (itemLen > 0) {
+                            itemOut.write(srcString, inputStringStart + 
itemStartOffset, itemLen);
+                        }
+                        listBuilder.addItem(itemStorge);
+                    }
+                };
+            }
+        };
+    }
+
+    @Override
+    public FunctionIdentifier getIdentifier() {
+        return AsterixBuiltinFunctions.STRING_SPLIT;
+    }
+
+}
diff --git 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 8592bd2..0850b04 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -178,23 +178,40 @@
     }
 
     /**
-     * return the byte offset of the first character of the matching string. 
Not including the MetaLength
-     *
-     * @param src
-     * @param pattern
-     * @param ignoreCase
-     * @return
+     * @param src,
+     *            the source string.
+     * @param pattern,
+     *            the pattern string.
+     * @param ignoreCase,
+     *            to ignore case or not.
+     * @return the byte offset of the first character of the matching string. 
Not including the MetaLength.
      */
     public static int find(UTF8StringPointable src, UTF8StringPointable 
pattern, boolean ignoreCase) {
+        return find(src, pattern, ignoreCase, 0);
+    }
+
+    /**
+     * @param src,
+     *            the source string.
+     * @param pattern,
+     *            the pattern string.
+     * @param ignoreCase,
+     *            to ignore case or not.
+     * @param startMatch,
+     *            the start offset.
+     * @return the byte offset of the first character of the matching string 
after <code>startMatchPos}</code>.
+     *         Not including the MetaLength.
+     */
+    public static int find(UTF8StringPointable src, UTF8StringPointable 
pattern, boolean ignoreCase, int startMatch) {
+        int startMatchPos = startMatch;
         final int srcUtfLen = src.getUTF8Length();
         final int pttnUtfLen = pattern.getUTF8Length();
         final int srcStart = src.getMetaDataLength();
         final int pttnStart = pattern.getMetaDataLength();
 
-        int startMatch = 0;
         int maxStart = srcUtfLen - pttnUtfLen;
-        while (startMatch <= maxStart) {
-            int c1 = startMatch;
+        while (startMatchPos <= maxStart) {
+            int c1 = startMatchPos;
             int c2 = 0;
             while (c1 < srcUtfLen && c2 < pttnUtfLen) {
                 char ch1 = src.charAt(srcStart + c1);
@@ -209,9 +226,9 @@
                 c2 += pattern.charSize(pttnStart + c2);
             }
             if (c2 == pttnUtfLen) {
-                return startMatch;
+                return startMatchPos;
             }
-            startMatch += src.charSize(srcStart + startMatch);
+            startMatchPos += src.charSize(srcStart + startMatchPos);
         }
         return -1;
     }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/1141
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib9de5a59807d5ff51fa5d72444053f87cf8dd289
Gerrit-PatchSet: 4
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Yingyi Bu <buyin...@gmail.com>
Gerrit-Reviewer: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-Reviewer: Yingyi Bu <buyin...@gmail.com>

Reply via email to