Yingyi Bu has submitted this change and it was merged. Change subject: Add string function repeat and split. ......................................................................
Add string function repeat and split. Change-Id: Ib9de5a59807d5ff51fa5d72444053f87cf8dd289 Reviewed-on: https://asterix-gerrit.ics.uci.edu/1141 Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Till Westmann <ti...@apache.org> --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java R asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java A asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java A asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java M hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java 14 files changed, 536 insertions(+), 17 deletions(-) Approvals: Till Westmann: Looks good to me, approved Jenkins: Verified; Verified Objections: Jenkins: Violations found diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java index a1746cc..27454e3 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java @@ -222,6 +222,8 @@ import org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionWithFlagDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceWithFlagsDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringRepeatDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringSplitDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringStartsWithDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringToCodePointDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringTrim2Descriptor; @@ -513,6 +515,8 @@ functionsToInjectUnkownHandling.add(StringLTrim2Descriptor.FACTORY); functionsToInjectUnkownHandling.add(StringRTrim2Descriptor.FACTORY); functionsToInjectUnkownHandling.add(StringPositionDescriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringRepeatDescriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringSplitDescriptor.FACTORY); // Constructors functionsToInjectUnkownHandling.add(ABooleanConstructorDescriptor.FACTORY); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp new file mode 100644 index 0000000..17904c7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +{ + "a": repeat(" new ", 2), + "b": repeat(" abcx ", 0), + "c": repeat("", 2), + "d": repeat(null, 2), + "e": repeat("asc", null), + "f": repeat(missing, 2), + "g": repeat("asc", missing), + "h": repeat(null, null), + "i": repeat(missing, missing) +}; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp new file mode 100644 index 0000000..0701ce3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +SELECT VALUE repeat(" new ", -1); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp new file mode 100644 index 0000000..3fb2307 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +{ + 'a': split("abc", "b"), + 'b': split("abc", "abc"), + 'c': split("abc", "x"), + 'd': split("abc", "a"), + 'e': split("abc", "bc"), + 'f': split("abc", ""), + 'g': split("", ""), + 'h': split("", "abc"), + 'i': split("", null), + 'j': split(null, "a"), + 'k': split("a", missing), + 'l': split(missing, 'a'), + 'm': split(null, missing), + 'n': split(null, null), + 'o': split(missing, missing) +}; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm new file mode 100644 index 0000000..7b8b7fc --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm @@ -0,0 +1 @@ +{ "a": " new new ", "b": "", "c": "", "d": null, "e": null, "h": null } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm new file mode 100644 index 0000000..3de2947 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm @@ -0,0 +1 @@ +{ "a": [ "a", "c" ], "b": [ "", "" ], "c": [ "abc" ], "d": [ "", "bc" ], "e": [ "a", "" ], "f": [ "a", "b", "c" ], "g": [ ], "h": [ "" ], "i": null, "j": null, "n": null } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 01a036c..fb9a8e8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -5351,6 +5351,17 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="repeat"> + <output-dir compare="Text">repeat</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="repeat_error"> + <output-dir compare="Text">repeat</output-dir> + <expected-error>repeat: expects a non-negative repeating number but got -1</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="regexp_replace"> <output-dir compare="Text">replace22</output-dir> </compilation-unit> @@ -5386,6 +5397,11 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="split"> + <output-dir compare="Text">split</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="start-with1"> <output-dir compare="Text">start-with1</output-dir> </compilation-unit> diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java index dc2412c..da6ee02 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java @@ -78,7 +78,6 @@ import org.apache.asterix.om.typecomputer.impl.OpenRecordConstructorResultType; import org.apache.asterix.om.typecomputer.impl.OrderedListConstructorTypeComputer; import org.apache.asterix.om.typecomputer.impl.OrderedListOfAInt32TypeComputer; -import org.apache.asterix.om.typecomputer.impl.OrderedListOfAInt64TypeComputer; import org.apache.asterix.om.typecomputer.impl.OrderedListOfAIntervalTypeComputer; import org.apache.asterix.om.typecomputer.impl.OrderedListOfAPointTypeComputer; import org.apache.asterix.om.typecomputer.impl.OrderedListOfAStringTypeComputer; @@ -90,9 +89,11 @@ import org.apache.asterix.om.typecomputer.impl.ScalarVersionOfAggregateResultType; import org.apache.asterix.om.typecomputer.impl.StringBooleanTypeComputer; import org.apache.asterix.om.typecomputer.impl.StringInt32TypeComputer; +import org.apache.asterix.om.typecomputer.impl.StringIntToStringTypeComputer; import org.apache.asterix.om.typecomputer.impl.StringStringTypeComputer; +import org.apache.asterix.om.typecomputer.impl.StringToInt64ListTypeComputer; +import org.apache.asterix.om.typecomputer.impl.StringToStringListTypeComputer; import org.apache.asterix.om.typecomputer.impl.SubsetCollectionTypeComputer; -import org.apache.asterix.om.typecomputer.impl.Substring2TypeComputer; import org.apache.asterix.om.typecomputer.impl.SubstringTypeComputer; import org.apache.asterix.om.typecomputer.impl.SwitchCaseComputer; import org.apache.asterix.om.typecomputer.impl.UnaryBinaryInt64TypeComputer; @@ -310,6 +311,10 @@ "string-concat", 1); public static final FunctionIdentifier STRING_JOIN = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "string-join", 2); + public static final FunctionIdentifier STRING_REPEAT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "repeat", 2); + public static final FunctionIdentifier STRING_SPLIT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "split", + 2); public static final FunctionIdentifier DATASET = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "dataset", 1); public static final FunctionIdentifier FEED_COLLECT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, @@ -884,10 +889,10 @@ addFunction(STRING_LIKE, BooleanFunctionTypeComputer.INSTANCE, true); addFunction(STRING_CONTAINS, ABooleanTypeComputer.INSTANCE, true); - addFunction(STRING_TO_CODEPOINT, OrderedListOfAInt64TypeComputer.INSTANCE, true); + addFunction(STRING_TO_CODEPOINT, StringToInt64ListTypeComputer.INSTANCE, true); addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true); addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true); - addFunction(SUBSTRING2, Substring2TypeComputer.INSTANCE, true); + addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE, true); addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, true); addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true); addFunction(STRING_UPPERCASE, StringStringTypeComputer.INSTANCE, true); @@ -913,6 +918,8 @@ addFunction(SUBSTRING_AFTER, StringStringTypeComputer.INSTANCE, true); addPrivateFunction(STRING_EQUAL, StringBooleanTypeComputer.INSTANCE, true); addFunction(STRING_JOIN, AStringTypeComputer.INSTANCE, true); + addFunction(STRING_REPEAT, StringIntToStringTypeComputer.INSTANCE, true); + addFunction(STRING_SPLIT, StringToStringListTypeComputer.INSTANCE, true); addPrivateFunction(ORDERED_LIST_CONSTRUCTOR, OrderedListConstructorTypeComputer.INSTANCE, true); addFunction(POINT_CONSTRUCTOR, APointTypeComputer.INSTANCE, true); diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java similarity index 91% rename from asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java index e2e812e..7bb83d0 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java @@ -25,8 +25,8 @@ import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; -public class Substring2TypeComputer extends AbstractResultTypeComputer { - public static final Substring2TypeComputer INSTANCE = new Substring2TypeComputer(); +public class StringIntToStringTypeComputer extends AbstractResultTypeComputer { + public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer(); @Override public void checkArgType(int argIndex, IAType type) throws AlgebricksException { diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java new file mode 100644 index 0000000..b01ac71 --- /dev/null +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.om.typecomputer.impl; + +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.om.types.IAType; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; + +public class StringToInt64ListTypeComputer extends AbstractStringTypeComputer { + + public static final StringToInt64ListTypeComputer INSTANCE = new StringToInt64ListTypeComputer(); + + private StringToInt64ListTypeComputer() { + } + + @Override + protected IAType getResultType(ILogicalExpression expr, IAType... strippedInputTypes) throws AlgebricksException { + return new AOrderedListType(BuiltinType.AINT64, null); + } +} \ No newline at end of file diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java new file mode 100644 index 0000000..4891330 --- /dev/null +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.om.typecomputer.impl; + +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.om.types.IAType; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; + +public class StringToStringListTypeComputer extends AbstractStringTypeComputer { + + public static final StringToStringListTypeComputer INSTANCE = new StringToStringListTypeComputer(); + + private StringToStringListTypeComputer() { + } + + @Override + protected IAType getResultType(ILogicalExpression expr, IAType... strippedInputTypes) throws AlgebricksException { + return new AOrderedListType(BuiltinType.ASTRING, null); + } +} \ No newline at end of file diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java new file mode 100644 index 0000000..0f4c0de --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.EnumDeserializer; +import org.apache.asterix.om.types.hierachy.ATypeHierarchy; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.VoidPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; +import org.apache.hyracks.util.string.UTF8StringUtil; + +public class StringRepeatDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringRepeatDescriptor(); + } + }; + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) + throws AlgebricksException { + return new IScalarEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws AlgebricksException { + return new IScalarEvaluator() { + // Argument evaluators. + private IScalarEvaluator evalString = args[0].createScalarEvaluator(ctx); + private IScalarEvaluator evalStart = args[1].createScalarEvaluator(ctx); + + // Argument pointers. + private IPointable argString = new VoidPointable(); + private IPointable argNumber = new VoidPointable(); + + // For outputting the result. + private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); + private DataOutput out = resultStorage.getDataOutput(); + private byte[] tempLengthArray = new byte[5]; + + @Override + public void evaluate(IFrameTupleReference tuple, IPointable result) throws AlgebricksException { + resultStorage.reset(); + + // Calls argument evaluators. + evalStart.evaluate(tuple, argNumber); + evalString.evaluate(tuple, argString); + + // Gets the repeating times. + int repeatingTimes = 0; + byte[] bytes = argNumber.getByteArray(); + int offset = argNumber.getStartOffset(); + try { + repeatingTimes = ATypeHierarchy.getIntegerValue(bytes, offset); + } catch (HyracksDataException e1) { + throw new AlgebricksException(e1); + } + // Checks repeatingTimes. It should be a non-negative value. + if (repeatingTimes < 0) { + throw new AlgebricksException(StringRepeatDescriptor.this.getIdentifier().getName() + + ": expects a non-negative repeating number but got " + repeatingTimes + "."); + } + + // Gets the input string. + bytes = argString.getByteArray(); + offset = argString.getStartOffset(); + // Checks the type of the string argument. + if (bytes[offset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new AlgebricksException(StringRepeatDescriptor.this.getIdentifier().getName() + + ": expects type STRING for the first argument but got " + + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset])); + } + + // Calculates the result string length. + int inputLen = UTF8StringUtil.getUTFLength(bytes, offset + 1); + int resultLen = Math.multiplyExact(inputLen, repeatingTimes); // Can throw overflow exception. + int cbytes = UTF8StringUtil.encodeUTF8Length(resultLen, tempLengthArray, 0); + + // Writes the output string. + int inputStringStart = offset + 1 + UTF8StringUtil.getNumBytesToStoreLength(inputLen); + try { + out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + out.write(tempLengthArray, 0, cbytes); + for (int numRepeats = 0; numRepeats < repeatingTimes; ++numRepeats) { + out.write(bytes, inputStringStart, inputLen); + } + } catch (IOException e) { + throw new AlgebricksException(e); + } + result.set(resultStorage); + } + }; + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_REPEAT; + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java new file mode 100644 index 0000000..8cd3a5b --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.asterix.builders.OrderedListBuilder; +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.om.types.EnumDeserializer; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.primitive.VoidPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; +import org.apache.hyracks.util.string.UTF8StringUtil; + +public class StringSplitDescriptor extends AbstractScalarFunctionDynamicDescriptor { + + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringSplitDescriptor(); + } + }; + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) { + return new IScalarEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws AlgebricksException { + return new IScalarEvaluator() { + // Argument evaluators. + private final IScalarEvaluator stringEval = args[0].createScalarEvaluator(ctx); + private final IScalarEvaluator patternEval = args[1].createScalarEvaluator(ctx); + + // Argument pointers. + private final IPointable argString = new VoidPointable(); + private final IPointable argPattern = new VoidPointable(); + private final UTF8StringPointable argStrPtr = new UTF8StringPointable(); + private final UTF8StringPointable argPatternPtr = new UTF8StringPointable(); + + // For an output string item. + private final ArrayBackedValueStorage itemStorge = new ArrayBackedValueStorage(); + private final DataOutput itemOut = itemStorge.getDataOutput(); + private final byte[] tempLengthArray = new byte[5]; + + // For the output list of strings. + private final AOrderedListType intListType = new AOrderedListType(BuiltinType.ASTRING, null); + private final OrderedListBuilder listBuilder = new OrderedListBuilder(); + private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); + private final DataOutput out = resultStorage.getDataOutput(); + + @Override + public void evaluate(IFrameTupleReference tuple, IPointable result) throws AlgebricksException { + try { + resultStorage.reset(); + // Calls argument evaluators. + stringEval.evaluate(tuple, argString); + patternEval.evaluate(tuple, argPattern); + + // Gets the bytes of the source string. + byte[] srcString = argString.getByteArray(); + int srcOffset = argString.getStartOffset(); + int srcLen = argString.getLength(); + // Type check for the first argument. + if (srcString[srcOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new AlgebricksException(StringSplitDescriptor.this.getIdentifier().getName() + + ": expects input type STRING for the first argument but got " + + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(srcString[srcOffset])); + } + + // Gets the bytes of the pattern string. + byte[] patternString = argPattern.getByteArray(); + int patternOffset = argPattern.getStartOffset(); + int patternLen = argPattern.getLength(); + // Type check for the second argument. + if (patternString[patternOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new AlgebricksException(StringSplitDescriptor.this.getIdentifier().getName() + + ": expects input type STRING for the second argument but got " + + EnumDeserializer.ATYPETAGDESERIALIZER + .deserialize(patternString[patternOffset])); + } + + // Sets the UTF8 String pointables. + argStrPtr.set(srcString, srcOffset + 1, srcLen - 1); + argPatternPtr.set(patternString, patternOffset + 1, patternLen - 1); + + // Gets the string length of the source string. + int inputStringLen = UTF8StringUtil.getUTFLength(srcString, srcOffset + 1); + int inputStringStart = srcOffset + 1 + + UTF8StringUtil.getNumBytesToStoreLength(inputStringLen); + // Gets the string length of the pattern string. + int inputPatternLen = UTF8StringUtil.getUTFLength(patternString, patternOffset + 1); + // Handles the case that the pattern is "". + boolean emptyStringPattern = inputPatternLen == 0; + + // Builds a list of strings. + listBuilder.reset(intListType); + int itemStrStart = 0; + int nextMatchStart; + while (itemStrStart < inputStringLen && (nextMatchStart = UTF8StringPointable + .find(argStrPtr, argPatternPtr, false, itemStrStart)) >= 0) { + // Adds an item string. + addItemString(srcString, inputStringStart, itemStrStart, + emptyStringPattern ? nextMatchStart + 1 : nextMatchStart); + itemStrStart = nextMatchStart + (emptyStringPattern ? 1 : inputPatternLen); + } + if (!emptyStringPattern) { + addItemString(srcString, inputStringStart, itemStrStart, inputStringLen); + } + listBuilder.write(out, true); + result.set(resultStorage); + } catch (IOException e1) { + throw new AlgebricksException(e1); + } + } + + private void addItemString(byte[] srcString, int inputStringStart, int itemStartOffset, + int nextMatchStart) throws IOException { + int itemLen = nextMatchStart - itemStartOffset; + int cbytes = UTF8StringUtil.encodeUTF8Length(itemLen, tempLengthArray, 0); + itemStorge.reset(); + itemOut.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + itemOut.write(tempLengthArray, 0, cbytes); + if (itemLen > 0) { + itemOut.write(srcString, inputStringStart + itemStartOffset, itemLen); + } + listBuilder.addItem(itemStorge); + } + }; + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_SPLIT; + } + +} diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java index 8592bd2..0850b04 100644 --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java @@ -178,23 +178,40 @@ } /** - * return the byte offset of the first character of the matching string. Not including the MetaLength - * - * @param src - * @param pattern - * @param ignoreCase - * @return + * @param src, + * the source string. + * @param pattern, + * the pattern string. + * @param ignoreCase, + * to ignore case or not. + * @return the byte offset of the first character of the matching string. Not including the MetaLength. */ public static int find(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) { + return find(src, pattern, ignoreCase, 0); + } + + /** + * @param src, + * the source string. + * @param pattern, + * the pattern string. + * @param ignoreCase, + * to ignore case or not. + * @param startMatch, + * the start offset. + * @return the byte offset of the first character of the matching string after <code>startMatchPos}</code>. + * Not including the MetaLength. + */ + public static int find(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase, int startMatch) { + int startMatchPos = startMatch; final int srcUtfLen = src.getUTF8Length(); final int pttnUtfLen = pattern.getUTF8Length(); final int srcStart = src.getMetaDataLength(); final int pttnStart = pattern.getMetaDataLength(); - int startMatch = 0; int maxStart = srcUtfLen - pttnUtfLen; - while (startMatch <= maxStart) { - int c1 = startMatch; + while (startMatchPos <= maxStart) { + int c1 = startMatchPos; int c2 = 0; while (c1 < srcUtfLen && c2 < pttnUtfLen) { char ch1 = src.charAt(srcStart + c1); @@ -209,9 +226,9 @@ c2 += pattern.charSize(pttnStart + c2); } if (c2 == pttnUtfLen) { - return startMatch; + return startMatchPos; } - startMatch += src.charSize(srcStart + startMatch); + startMatchPos += src.charSize(srcStart + startMatchPos); } return -1; } -- To view, visit https://asterix-gerrit.ics.uci.edu/1141 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ib9de5a59807d5ff51fa5d72444053f87cf8dd289 Gerrit-PatchSet: 4 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Yingyi Bu <buyin...@gmail.com> Gerrit-Reviewer: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Gerrit-Reviewer: Till Westmann <ti...@apache.org> Gerrit-Reviewer: Yingyi Bu <buyin...@gmail.com>