Yingyi Bu has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1104
Change subject: Add several builtin functions. ...................................................................... Add several builtin functions. - Added functions include: greatest, least, concat(||), initcap(title), regexp_contains, ltrim, trim, rtrim. Change-Id: I26351af22f67d66b56176f55b29a4e7ff63583f7 --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/greatest_mixed/greatest_mixed.3.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/least_mixed/least_mixed.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_func/concat_func.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe/concat_pipe.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe_multi/concat_pipe.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/initcap/initcap.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/ltrim/ltrim.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_contains06/regexp_contains06.3.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/rtrim/rtrim.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/title/title.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/trim/trim.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/greatest_mixed/greatest_mixed.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/least_mixed/least_mixed.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/string/concat_pipe_multi/concat_pipe_multi.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/string/initcap/initcap.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/string/ltrim/ltrim.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/string/rtrim/rtrim.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/string/trim/trim.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml M asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/struct/OperatorType.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/OperatorExpressionVisitor.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppBuiltinFunctionRewriteVisitor.java A asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppListInputFunctionRewriteVisitor.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java M asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringBoolEval.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringStringEval.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringInitCapDescriptor.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrim2Descriptor.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrimDescriptor.java M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLowerCaseDescriptor.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrim2Descriptor.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrimDescriptor.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrim2Descriptor.java A asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrimDescriptor.java M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringUpperCaseDescriptor.java M hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java M hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java 44 files changed, 1,605 insertions(+), 187 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/04/1104/1 diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java index ee4dc32..cd9e39d 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java @@ -205,16 +205,23 @@ import org.apache.asterix.runtime.evaluators.functions.StringContainsDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringEndsWithDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringEqualDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringInitCapDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringJoinDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringLTrim2Descriptor; +import org.apache.asterix.runtime.evaluators.functions.StringLTrimDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringLengthDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringLikeDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringLowerCaseDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringMatchesDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringMatchesWithFlagDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringRTrim2Descriptor; +import org.apache.asterix.runtime.evaluators.functions.StringRTrimDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringReplaceDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringReplaceWithFlagsDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringStartsWithDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringToCodePointDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringTrim2Descriptor; +import org.apache.asterix.runtime.evaluators.functions.StringTrimDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringUpperCaseDescriptor; import org.apache.asterix.runtime.evaluators.functions.Substring2Descriptor; import org.apache.asterix.runtime.evaluators.functions.SubstringAfterDescriptor; @@ -491,6 +498,13 @@ functionsToInjectUnkownHandling.add(StringConcatDescriptor.FACTORY); functionsToInjectUnkownHandling.add(StringJoinDescriptor.FACTORY); functionsToInjectUnkownHandling.add(RegExpDescriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringInitCapDescriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringTrimDescriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringLTrimDescriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringRTrimDescriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringTrim2Descriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringLTrim2Descriptor.FACTORY); + functionsToInjectUnkownHandling.add(StringRTrim2Descriptor.FACTORY); // Constructors functionsToInjectUnkownHandling.add(ABooleanConstructorDescriptor.FACTORY); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/greatest_mixed/greatest_mixed.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/greatest_mixed/greatest_mixed.3.query.sqlpp new file mode 100644 index 0000000..9056b4b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/greatest_mixed/greatest_mixed.3.query.sqlpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +SELECT VALUE greatest(1.0, 3.0, -2, 10.0, 2, null, missing); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/least_mixed/least_mixed.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/least_mixed/least_mixed.1.query.sqlpp new file mode 100644 index 0000000..948d146 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/comparison/least_mixed/least_mixed.1.query.sqlpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +SELECT VALUE LEAST(1.0, 3.0, -2, 10.0, 2, null, missing); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_func/concat_func.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_func/concat_func.1.query.sqlpp new file mode 100644 index 0000000..46f2a65 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_func/concat_func.1.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +with k as [{'a':1,'b':'hello'},{'a':2,'b':{'k':[1,2,2]}}] + +select value concat(x.b, " world") +from k as x +where x.a = 1 +; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe/concat_pipe.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe/concat_pipe.1.query.sqlpp new file mode 100644 index 0000000..16bef11 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe/concat_pipe.1.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +with k as [{'a':1,'b':'hello'},{'a':2,'b':{'k':[1,2,2]}}] + +select value x.b || " world" +from k as x +where x.a = 1 +; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe_multi/concat_pipe.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe_multi/concat_pipe.1.query.sqlpp new file mode 100644 index 0000000..3f06395 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/concat_pipe_multi/concat_pipe.1.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +with k as [{'a':1,'b':'hello'},{'a':2,'b':{'k':[1,2,2]}}] + +select value x.b || " new" || " world" +from k as x +where x.a = 1 +; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/initcap/initcap.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/initcap/initcap.1.query.sqlpp new file mode 100644 index 0000000..35580ce --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/initcap/initcap.1.query.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ + initcap(""), + initcap("very large data base--a conference"), + initcap("very large data base?a conference"), + initcap(null), + initcap(missing) +]; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/ltrim/ltrim.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/ltrim/ltrim.1.query.sqlpp new file mode 100644 index 0000000..bc062e5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/ltrim/ltrim.1.query.sqlpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ + ltrim(""), + ltrim(" "), + ltrim(" abc "), + ltrim("abcd"), + ltrim(null), + ltrim(missing), + ltrim("", "ad "), + ltrim(" ", "ad "), + ltrim(" abc ", "ad "), + ltrim("abcd", "ad "), + ltrim("abc", null), + ltrim(null, "abc"), + ltrim(null, null), + ltrim("abc", missing), + ltrim(missing, "abc"), + ltrim(missing, missing) +]; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_contains06/regexp_contains06.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_contains06/regexp_contains06.3.query.sqlpp new file mode 100644 index 0000000..4fed528 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_contains06/regexp_contains06.3.query.sqlpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +select element a +from [ + matches('mnop','.'), + matches('abcdefABCDEF','/d'), + matches('12345','\\d'), + matches('abcdefGHIJK','\\D'), + matches(' ','\\s'), + matches(' ','\\S'), + matches('Welcome to pattern matching!','[a-zA-Z_0-9]'), + matches('!@#$%^&*()','[a-zA-Z_0-9]'), + matches('!@#$%^&*()','[^\\W]'), + matches('!@#$%^&*','[^\\w]'), + matches('0xffff','[\\p{XDigit}]'), + matches('FFFFFFFF','[\\p{XDigit}]'), + matches('abcdefgh','[\\p{javaLowerCase}]'), + matches('ABCDEF','[\\p{javaLowerCase}]'), + matches(`codepoint-to-string`([163]),'[\\p{Sc}]') + ] as a +; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/rtrim/rtrim.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/rtrim/rtrim.1.query.sqlpp new file mode 100644 index 0000000..64a050b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/rtrim/rtrim.1.query.sqlpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ + rtrim(""), + rtrim(" "), + rtrim(" abc "), + rtrim("abcd"), + rtrim(null), + rtrim(missing), + rtrim("", "ad "), + rtrim(" ", "ad "), + rtrim(" abc ", "ad "), + rtrim("abcd", "ad "), + rtrim("abc", null), + rtrim(null, "abc"), + rtrim(null, null), + rtrim("abc", missing), + rtrim(missing, "abc"), + rtrim(missing, missing) +]; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/title/title.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/title/title.1.query.sqlpp new file mode 100644 index 0000000..b12c475 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/title/title.1.query.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ + title(""), + title("very large data base--a conference"), + title("very large data base?a conference"), + title(null), + title(missing) +]; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/trim/trim.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/trim/trim.1.query.sqlpp new file mode 100644 index 0000000..61dc619 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/trim/trim.1.query.sqlpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ + trim(""), + trim(" "), + trim(" abc "), + trim("abcd"), + trim(null), + trim(missing), + trim("", "ad "), + trim(" ", "ad "), + trim(" abc ", "ad "), + trim("abcd", "ad "), + trim("abc", null), + trim(null, "abc"), + trim(null, null), + trim("abc", missing), + trim(missing, "abc"), + trim(missing, missing) +]; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/greatest_mixed/greatest_mixed.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/greatest_mixed/greatest_mixed.1.adm new file mode 100644 index 0000000..2f52450 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/greatest_mixed/greatest_mixed.1.adm @@ -0,0 +1 @@ +10.0 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/least_mixed/least_mixed.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/least_mixed/least_mixed.1.adm new file mode 100644 index 0000000..134d589 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/least_mixed/least_mixed.1.adm @@ -0,0 +1 @@ +-2.0 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/concat_pipe_multi/concat_pipe_multi.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/concat_pipe_multi/concat_pipe_multi.1.adm new file mode 100644 index 0000000..90ca27a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/concat_pipe_multi/concat_pipe_multi.1.adm @@ -0,0 +1 @@ +"hello new world" diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/initcap/initcap.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/initcap/initcap.1.adm new file mode 100644 index 0000000..5f6a8de --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/initcap/initcap.1.adm @@ -0,0 +1 @@ +[ "", "Very Large Data Base--A Conference", "Very Large Data Base?A Conference", null, null ] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/ltrim/ltrim.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/ltrim/ltrim.1.adm new file mode 100644 index 0000000..d336557 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/ltrim/ltrim.1.adm @@ -0,0 +1 @@ +[ "", "", "abc ", "abcd", null, null, "", "", "bc ", "bcd", null, null, null, null, null, null ] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/rtrim/rtrim.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/rtrim/rtrim.1.adm new file mode 100644 index 0000000..bcaee0b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/rtrim/rtrim.1.adm @@ -0,0 +1 @@ +[ "", "", " abc", "abcd", null, null, "", "", " abc", "abc", null, null, null, null, null, null ] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/trim/trim.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/trim/trim.1.adm new file mode 100644 index 0000000..410cb00 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/trim/trim.1.adm @@ -0,0 +1 @@ +[ "", "", "abc", "abcd", null, null, "", "", "bc", "bc", null, null, null, null, null, null ] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 9548259..3f3bcc2 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -1146,6 +1146,16 @@ <output-dir compare="Text">binary_null</output-dir> </compilation-unit> </test-case> + <test-case FilePath="comparison"> + <compilation-unit name="greatest_mixed"> + <output-dir compare="Text">greatest_mixed</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="comparison"> + <compilation-unit name="least_mixed"> + <output-dir compare="Text">least_mixed</output-dir> + </compilation-unit> + </test-case> </test-group> <test-group name="constructor"> <test-case FilePath="constructor"> @@ -5073,6 +5083,21 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="concat_pipe"> + <output-dir compare="Text">concat_03</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="concat_pipe_multi"> + <output-dir compare="Text">concat_pipe_multi</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="concat_func"> + <output-dir compare="Text">concat_03</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="constructor"> <output-dir compare="Text">constructor</output-dir> </compilation-unit> @@ -5148,6 +5173,11 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="initcap"> + <output-dir compare="Text">initcap</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="length_01"> <output-dir compare="Text">length_01</output-dir> </compilation-unit> @@ -5170,6 +5200,11 @@ <test-case FilePath="string"> <compilation-unit name="lowercase"> <output-dir compare="Text">lowercase</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="ltrim"> + <output-dir compare="Text">ltrim</output-dir> </compilation-unit> </test-case> <test-case FilePath="string"> @@ -5238,6 +5273,11 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="regexp_contains06"> + <output-dir compare="Text">matches06</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="replace1"> <output-dir compare="Text">replace1</output-dir> </compilation-unit> @@ -5260,6 +5300,11 @@ <test-case FilePath="string"> <compilation-unit name="replace3"> <output-dir compare="Text">replace3</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="rtrim"> + <output-dir compare="Text">rtrim</output-dir> </compilation-unit> </test-case> <test-case FilePath="string"> @@ -5465,6 +5510,11 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="title"> + <output-dir compare="Text">initcap</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="toLowerCase02"> <output-dir compare="Text">toLowerCase02</output-dir> </compilation-unit> @@ -5480,6 +5530,11 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="trim"> + <output-dir compare="Text">trim</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="uppercase"> <output-dir compare="Text">uppercase</output-dir> </compilation-unit> diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/struct/OperatorType.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/struct/OperatorType.java index 6e2e3e2..a5a1bb8 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/struct/OperatorType.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/struct/OperatorType.java @@ -32,6 +32,7 @@ NEQ("!="), PLUS("+"), MINUS("-"), + CONCAT("||"), MUL("*"), DIV("/"), // float/double // divide diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java index 7a26c6b..cf88961 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java @@ -55,6 +55,9 @@ // Rewrites like/not-like expressions. rewriteOperatorExpression(); + // Rewrites several variable-arg functions into their corresponding internal list-input functions. + rewriteListInputFunctions(); + // Generates ids for variables (considering scopes) but DOES NOT replace unbounded variable access with the dataset function. // An unbounded variable within a function could be a bounded variable in the top-level query. variableCheckAndRewrite(false); diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java index 1ce5de7..dd79969 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java @@ -59,6 +59,7 @@ import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppGlobalAggregationSugarVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppGroupByVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppInlineUdfsVisitor; +import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppListInputFunctionRewriteVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SubstituteGroupbyExpressionWithVariableVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.VariableCheckAndRewriteVisitor; import org.apache.asterix.lang.sqlpp.struct.SetOperationRight; @@ -122,6 +123,9 @@ // Generate ids for variables (considering scopes) and replace global variable access with the dataset function. variableCheckAndRewrite(true); + // Rewrites several variable-arg functions into their corresponding internal list-input functions. + rewriteListInputFunctions(); + // Inlines functions. inlineDeclaredUdfs(); @@ -154,6 +158,14 @@ globalAggregationVisitor.visit(topExpr, null); } + protected void rewriteListInputFunctions() throws AsterixException { + if (topExpr == null) { + return; + } + SqlppListInputFunctionRewriteVisitor listInputFunctionVisitor = new SqlppListInputFunctionRewriteVisitor(); + listInputFunctionVisitor.visit(topExpr, null); + } + protected void rewriteFunctionNames() throws AsterixException { if (topExpr == null) { return; diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/OperatorExpressionVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/OperatorExpressionVisitor.java index 815e020..b68962f 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/OperatorExpressionVisitor.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/OperatorExpressionVisitor.java @@ -30,8 +30,8 @@ import org.apache.asterix.lang.common.expression.CallExpr; import org.apache.asterix.lang.common.expression.OperatorExpr; import org.apache.asterix.lang.common.expression.QuantifiedExpression; -import org.apache.asterix.lang.common.expression.QuantifiedExpression.Quantifier; import org.apache.asterix.lang.common.expression.VariableExpr; +import org.apache.asterix.lang.common.expression.QuantifiedExpression.Quantifier; import org.apache.asterix.lang.common.rewrites.LangRewritingContext; import org.apache.asterix.lang.common.struct.OperatorType; import org.apache.asterix.lang.common.struct.QuantifiedPair; @@ -39,30 +39,31 @@ public class OperatorExpressionVisitor extends AbstractSqlppExpressionScopingVisitor { + private static final String CONCAT = "concat"; + public OperatorExpressionVisitor(LangRewritingContext context) { super(context); } @Override public Expression visit(OperatorExpr operatorExpr, ILangExpression arg) throws AsterixException { - List<Expression> newExprList = new ArrayList<>(); for (Expression expr : operatorExpr.getExprList()) { newExprList.add(expr.accept(this, operatorExpr)); } operatorExpr.setExprList(newExprList); - if (operatorExpr.getExprList().size() == 2) { - OperatorType opType = operatorExpr.getOpList().get(0); - switch (opType) { - case LIKE: - case NOT_LIKE: - return processLikeOperator(operatorExpr, opType); - case IN: - case NOT_IN: - return processInOperator(operatorExpr, opType); - default: - break; - } + OperatorType opType = operatorExpr.getOpList().get(0); + switch (opType) { + case LIKE: + case NOT_LIKE: + return processLikeOperator(operatorExpr, opType); + case IN: + case NOT_IN: + return processInOperator(operatorExpr, opType); + case CONCAT: + return processConcatOperator(operatorExpr); + default: + break; } return operatorExpr; } @@ -73,7 +74,7 @@ return likeExpr; } return new CallExpr(new FunctionSignature(null, "not", 1), - new ArrayList<Expression>(Collections.singletonList(likeExpr))); + new ArrayList<>(Collections.singletonList(likeExpr))); } private Expression processInOperator(OperatorExpr operatorExpr, OperatorType opType) throws AsterixException { @@ -86,13 +87,17 @@ comparison.setCurrentop(true); if (opType == OperatorType.IN) { comparison.addOperator("="); - return new QuantifiedExpression(Quantifier.SOME, new ArrayList<QuantifiedPair>( + return new QuantifiedExpression(Quantifier.SOME, new ArrayList<>( Collections.singletonList(new QuantifiedPair(bindingVar, collectionExpr))), comparison); } else { comparison.addOperator("!="); - return new QuantifiedExpression(Quantifier.EVERY, new ArrayList<QuantifiedPair>( + return new QuantifiedExpression(Quantifier.EVERY, new ArrayList<>( Collections.singletonList(new QuantifiedPair(bindingVar, collectionExpr))), comparison); } } + private Expression processConcatOperator(OperatorExpr operatorExpr) { + return new CallExpr(new FunctionSignature(null, CONCAT, 1), operatorExpr.getExprList()); + } + } diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppBuiltinFunctionRewriteVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppBuiltinFunctionRewriteVisitor.java index fbf08ee..1f4e1f4 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppBuiltinFunctionRewriteVisitor.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppBuiltinFunctionRewriteVisitor.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.lang.sqlpp.rewrites.visitor; +import static org.apache.asterix.lang.sqlpp.util.FunctionMapUtil.normalizedListInputFunctions; + import java.util.ArrayList; import java.util.List; diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppListInputFunctionRewriteVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppListInputFunctionRewriteVisitor.java new file mode 100644 index 0000000..1c4135e --- /dev/null +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppListInputFunctionRewriteVisitor.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.lang.sqlpp.rewrites.visitor; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.asterix.common.exceptions.AsterixException; +import org.apache.asterix.lang.common.base.Expression; +import org.apache.asterix.lang.common.base.ILangExpression; +import org.apache.asterix.lang.common.expression.CallExpr; +import org.apache.asterix.lang.sqlpp.util.FunctionMapUtil; +import org.apache.asterix.lang.sqlpp.visitor.base.AbstractSqlppSimpleExpressionVisitor; + +/** + * This visitor rewrites severael variable-arg user-facing functions to their coressponding + * AsterixDB internal functions that takes a list as the input. + */ +public class SqlppListInputFunctionRewriteVisitor extends AbstractSqlppSimpleExpressionVisitor { + + @Override + public Expression visit(CallExpr callExpr, ILangExpression arg) throws AsterixException { + List<Expression> newExprList = new ArrayList<>(); + for (Expression expr : callExpr.getExprList()) { + newExprList.add(expr.accept(this, arg)); + } + callExpr.setExprList(newExprList); + return FunctionMapUtil.normalizedListInputFunctions(callExpr); + } + +} diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java index 6d19995..8fcb7b3 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java @@ -18,12 +18,16 @@ */ package org.apache.asterix.lang.sqlpp.util; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.asterix.common.exceptions.AsterixException; import org.apache.asterix.common.functions.FunctionConstants; import org.apache.asterix.common.functions.FunctionSignature; +import org.apache.asterix.lang.common.expression.CallExpr; +import org.apache.asterix.lang.common.expression.ListConstructor; import org.apache.asterix.lang.common.util.FunctionUtil; import org.apache.asterix.om.functions.AsterixBuiltinFunctions; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; @@ -43,12 +47,25 @@ FUNCTION_NAME_MAP.put("lower", "lowercase"); // SQL: lower, AQL: lowercase FUNCTION_NAME_MAP.put("substr", "substring"); // SQL: substr, AQL: substring FUNCTION_NAME_MAP.put("upper", "uppercase"); //SQL: upper, AQL: uppercase + FUNCTION_NAME_MAP.put("title", "initcap"); //SQL: title, SQL: initcap + FUNCTION_NAME_MAP.put("regexp_contains", "matches"); //SQL: title, SQL: initcap + FUNCTION_NAME_MAP.put("regexp_replace", "replace"); //SQL: title, SQL: initcap + } + + // Maps from a variable-arg SQL function names to an internal list-arg function name. + private static final Map<String, String> LIST_INPUT_FUNCTION_MAP = new HashMap<>(); + + static { + LIST_INPUT_FUNCTION_MAP.put("concat", "string-concat"); + LIST_INPUT_FUNCTION_MAP.put("greatest", CORE_AGGREGATE_PREFIX + SQL_PREFIX + "max"); + LIST_INPUT_FUNCTION_MAP.put("least", CORE_AGGREGATE_PREFIX + SQL_PREFIX + "min"); } /** * Whether a function signature is a SQL-92 core aggregate function. * - * @param fs, + * @param signature + * , * the function signature. * @return true if the function signature is a SQL-92 core aggregate, * false otherwise. @@ -120,6 +137,25 @@ } /** + * Rewrites a variable-arg, user-surface function call into an internal, list-arg function. + * + * @param callExpr + * The input call expression. + * @return a new call expression that calls the corresponding AsterixDB internal function. + */ + public static CallExpr normalizedListInputFunctions(CallExpr callExpr) { + FunctionSignature fs = callExpr.getFunctionSignature(); + String internalFuncName = LIST_INPUT_FUNCTION_MAP.get(fs.getName().toLowerCase()); + if (internalFuncName == null) { + return callExpr; + } + callExpr.setFunctionSignature(new FunctionSignature(FunctionConstants.ASTERIX_NS, internalFuncName, 1)); + callExpr.setExprList(new ArrayList<>(Collections.singletonList(new ListConstructor( + ListConstructor.Type.ORDERED_LIST_CONSTRUCTOR, callExpr.getExprList())))); + return callExpr; + } + + /** * Removes the "coll_" prefix for user-facing SQL++ core aggregate function names. * * @param name, diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj index 2378060..6cdd396 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj +++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj @@ -1842,8 +1842,8 @@ { operand = MultExpr() ( - LOOKAHEAD(1) - (<PLUS> | <MINUS>) + LOOKAHEAD(1) + (<PLUS> | <MINUS> | <CONCAT>) { if (op == null) { op = new OperatorExpr(); @@ -2302,7 +2302,7 @@ List<Expression> whenExprs = new ArrayList<Expression>(); List<Expression> thenExprs = new ArrayList<Expression>(); Expression elseExpr = null; - + Expression whenExpr = null; Expression thenExpr = null; } @@ -3005,6 +3005,7 @@ TOKEN : { <CARET : "^"> + | <CONCAT : "||"> | <DIV : "/"> | <IDIV : "idiv"> | <MINUS : "-"> diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java index d17be1a..e276fa2 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java @@ -255,6 +255,20 @@ "lowercase", 1); public static final FunctionIdentifier STRING_UPPERCASE = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "uppercase", 1); + public static final FunctionIdentifier STRING_INITCAP = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "initcap", 1); + public static final FunctionIdentifier STRING_TRIM = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "trim", + 1); + public static final FunctionIdentifier STRING_LTRIM = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "ltrim", + 1); + public static final FunctionIdentifier STRING_RTRIM = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "rtrim", + 1); + public static final FunctionIdentifier STRING_TRIM2 = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "trim", + 2); + public static final FunctionIdentifier STRING_LTRIM2 = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "ltrim", 2); + public static final FunctionIdentifier STRING_RTRIM2 = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "rtrim", 2); public static final FunctionIdentifier STRING_REPLACE = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "replace", 3); public static final FunctionIdentifier STRING_REPLACE_WITH_FLAG = new FunctionIdentifier( @@ -868,6 +882,14 @@ addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, true); addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true); addFunction(STRING_UPPERCASE, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_INITCAP, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_STARTS_WITH, StringBooleanTypeComputer.INSTANCE, true); + addFunction(STRING_TRIM, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_LTRIM, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_RTRIM, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_TRIM2, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_LTRIM2, StringStringTypeComputer.INSTANCE, true); + addFunction(STRING_RTRIM2, StringStringTypeComputer.INSTANCE, true); addFunction(STRING_STARTS_WITH, StringBooleanTypeComputer.INSTANCE, true); addFunction(STRING_ENDS_WITH, StringBooleanTypeComputer.INSTANCE, true); addFunction(STRING_MATCHES, StringBooleanTypeComputer.INSTANCE, true); diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringBoolEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringBoolEval.java index fd89493..ae6fd52 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringBoolEval.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringBoolEval.java @@ -18,75 +18,33 @@ */ package org.apache.asterix.runtime.evaluators.functions; -import java.io.DataOutput; - import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider; import org.apache.asterix.om.base.ABoolean; -import org.apache.asterix.om.types.ATypeTag; import org.apache.asterix.om.types.BuiltinType; -import org.apache.asterix.om.types.EnumDeserializer; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; -import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.primitive.UTF8StringPointable; -import org.apache.hyracks.data.std.primitive.VoidPointable; -import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; -import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; -public abstract class AbstractBinaryStringBoolEval implements IScalarEvaluator { +public abstract class AbstractBinaryStringBoolEval extends AbstractBinaryStringEval { - private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); - private DataOutput dout = resultStorage.getDataOutput(); - - private IPointable ptr0 = new VoidPointable(); - private IPointable ptr1 = new VoidPointable(); - private IScalarEvaluator evalLeft; - private IScalarEvaluator evalRight; - private final FunctionIdentifier funcID; - - private final UTF8StringPointable leftPtr = new UTF8StringPointable(); - private final UTF8StringPointable rightPtr = new UTF8StringPointable(); - + // For outputting results. @SuppressWarnings({ "rawtypes" }) private ISerializerDeserializer boolSerde = AqlSerializerDeserializerProvider.INSTANCE .getSerializerDeserializer(BuiltinType.ABOOLEAN); public AbstractBinaryStringBoolEval(IHyracksTaskContext context, IScalarEvaluatorFactory evalLeftFactory, IScalarEvaluatorFactory evalRightFactory, FunctionIdentifier funcID) throws AlgebricksException { - this.evalLeft = evalLeftFactory.createScalarEvaluator(context); - this.evalRight = evalRightFactory.createScalarEvaluator(context); - this.funcID = funcID; + super(context, evalLeftFactory, evalRightFactory, funcID); } - @SuppressWarnings("unchecked") @Override - public void evaluate(IFrameTupleReference tuple, IPointable result) throws AlgebricksException { - evalLeft.evaluate(tuple, ptr0); - evalRight.evaluate(tuple, ptr1); - - byte[] bytes0 = ptr0.getByteArray(); - int offset0 = ptr0.getStartOffset(); - int len0 = ptr0.getLength(); - byte[] bytes1 = ptr1.getByteArray(); - int offset1 = ptr1.getStartOffset(); - int len1 = ptr1.getLength(); - - resultStorage.reset(); - if (bytes0[offset0] != ATypeTag.SERIALIZED_STRING_TYPE_TAG - || bytes1[offset1] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { - throw new AlgebricksException(funcID.getName() + ": expects input type STRING or NULL, but got " - + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes0[offset0]) + " and " - + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes1[offset1]) + ")!"); - } - - leftPtr.set(bytes0, offset0 + 1, len0 - 1); - rightPtr.set(bytes1, offset1 + 1, len1 - 1); - + public void process(UTF8StringPointable leftPtr, UTF8StringPointable rightPtr, IPointable result) + throws AlgebricksException { ABoolean res = compute(leftPtr, rightPtr) ? ABoolean.TRUE : ABoolean.FALSE; try { boolSerde.serialize(res, dout); @@ -96,6 +54,7 @@ result.set(resultStorage); } + // Computes a boolean value from two input strings. protected abstract boolean compute(UTF8StringPointable left, UTF8StringPointable right) throws AlgebricksException; } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java new file mode 100644 index 0000000..73c2237 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.DataOutput; + +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.EnumDeserializer; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.primitive.VoidPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; + +public abstract class AbstractBinaryStringEval implements IScalarEvaluator { + + // Argument evaluators. + private IScalarEvaluator evalLeft; + private IScalarEvaluator evalRight; + + // Argument pointables. + private IPointable ptr0 = new VoidPointable(); + private IPointable ptr1 = new VoidPointable(); + private final UTF8StringPointable leftPtr = new UTF8StringPointable(); + private final UTF8StringPointable rightPtr = new UTF8StringPointable(); + + // For results. + protected ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); + protected DataOutput dout = resultStorage.getDataOutput(); + + // Function ID, for error reporting. + private final FunctionIdentifier funcID; + + public AbstractBinaryStringEval(IHyracksTaskContext context, IScalarEvaluatorFactory evalLeftFactory, + IScalarEvaluatorFactory evalRightFactory, FunctionIdentifier funcID) throws AlgebricksException { + this.evalLeft = evalLeftFactory.createScalarEvaluator(context); + this.evalRight = evalRightFactory.createScalarEvaluator(context); + this.funcID = funcID; + } + + @SuppressWarnings("unchecked") + @Override + public void evaluate(IFrameTupleReference tuple, IPointable resultPointable) throws AlgebricksException { + resultStorage.reset(); + + // Gets the first argument. + evalLeft.evaluate(tuple, ptr0); + byte[] bytes0 = ptr0.getByteArray(); + int offset0 = ptr0.getStartOffset(); + int len0 = ptr0.getLength(); + + // Gets the second argument. + evalRight.evaluate(tuple, ptr1); + byte[] bytes1 = ptr1.getByteArray(); + int offset1 = ptr1.getStartOffset(); + int len1 = ptr1.getLength(); + + // Type check. + if (bytes0[offset0] != ATypeTag.SERIALIZED_STRING_TYPE_TAG + || bytes1[offset1] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new AlgebricksException(funcID.getName() + ": expects input type STRING, but got " + + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes0[offset0]) + " and " + + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes1[offset1]) + ")!"); + } + + // Sets StringUTF8Pointables. + leftPtr.set(bytes0, offset0 + 1, len0 - 1); + rightPtr.set(bytes1, offset1 + 1, len1 - 1); + + // The actual processing. + process(leftPtr, rightPtr, resultPointable); + } + + /** + * The actual processing of a string function. + * + * @param left + * , the first argument. + * @param right + * , the second argument. + * @param resultPointable + * , the result. + * @throws AlgebricksException + */ + protected abstract void process(UTF8StringPointable left, UTF8StringPointable right, IPointable resultPointable) + throws AlgebricksException; + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringStringEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringStringEval.java new file mode 100644 index 0000000..ee1559a --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringStringEval.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.types.ATypeTag; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; + +public abstract class AbstractBinaryStringStringEval extends AbstractBinaryStringEval { + + // For outputting results. + protected final UTF8StringPointable resultStrPtr = new UTF8StringPointable(); + protected final GrowableArray resultArray = new GrowableArray(); + protected final UTF8StringBuilder resultBuilder = new UTF8StringBuilder(); + + public AbstractBinaryStringStringEval(IHyracksTaskContext context, IScalarEvaluatorFactory evalLeftFactory, + IScalarEvaluatorFactory evalRightFactory, FunctionIdentifier funcID) throws AlgebricksException { + super(context, evalLeftFactory, evalRightFactory, funcID); + } + + @Override + public void process(UTF8StringPointable leftPtr, UTF8StringPointable rightPtr, IPointable resultPointable) + throws AlgebricksException { + resultArray.reset(); + compute(leftPtr, rightPtr, resultStrPtr); + writeResult(resultPointable); + } + + // Computes a boolean value from two input strings. + protected abstract void compute(UTF8StringPointable left, UTF8StringPointable right, + UTF8StringPointable resultStrPtr) throws AlgebricksException; + + // Writes the result. + void writeResult(IPointable resultPointable) throws AlgebricksException { + try { + dout.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + dout.write(resultStrPtr.getByteArray(), 0, resultStrPtr.getLength()); + resultPointable.set(resultStorage); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java new file mode 100644 index 0000000..8c01472 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractUnaryStringStringEval.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.EnumDeserializer; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.primitive.VoidPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; +import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; + +abstract class AbstractUnaryStringStringEval implements IScalarEvaluator { + + // For the argument. + private final IScalarEvaluator argEval; + private final VoidPointable argPtr = new VoidPointable(); + private final UTF8StringPointable stringPtr = new UTF8StringPointable(); + + // For writing results. + final GrowableArray resultArray = new GrowableArray(); + final UTF8StringBuilder resultBuilder = new UTF8StringBuilder(); + private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); + private final DataOutput dataOutput = resultStorage.getDataOutput(); + private final FunctionIdentifier funcID; + + AbstractUnaryStringStringEval(IHyracksTaskContext context, IScalarEvaluatorFactory argEvalFactory, + FunctionIdentifier funcID) throws AlgebricksException { + this.argEval = argEvalFactory.createScalarEvaluator(context); + this.funcID = funcID; + } + + @SuppressWarnings("unchecked") + @Override + public void evaluate(IFrameTupleReference tuple, IPointable resultPointable) throws AlgebricksException { + resultStorage.reset(); + argEval.evaluate(tuple, argPtr); + byte[] argBytes = argPtr.getByteArray(); + int offset = argPtr.getStartOffset(); + byte inputTypeTag = argBytes[offset]; + if (inputTypeTag != ATypeTag.SERIALIZED_STRING_TYPE_TAG) { + throw new AlgebricksException(funcID.getName() + ": expects input type to be STRING, but got (" + + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(inputTypeTag) + "."); + } + stringPtr.set(argBytes, offset + 1, argPtr.getLength() - 1); + resultArray.reset(); + process(stringPtr, resultPointable); + writeResult(resultPointable); + } + + // Processes an input UTF8 string. + abstract void process(UTF8StringPointable inputString, IPointable resultPointable) + throws AlgebricksException; + + // Writes the result. + void writeResult(IPointable resultPointable) throws AlgebricksException { + try { + dataOutput.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + dataOutput.write(resultArray.getByteArray(), 0, resultArray.getLength()); + resultPointable.set(resultStorage); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringInitCapDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringInitCapDescriptor.java new file mode 100644 index 0000000..6dc1a39 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringInitCapDescriptor.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; + +public class StringInitCapDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringInitCapDescriptor(); + } + }; + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(IScalarEvaluatorFactory[] args) { + return new IScalarEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new InitCapEvaluator(ctx, args[0]); + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_INITCAP; + } + + class InitCapEvaluator extends AbstractUnaryStringStringEval { + + InitCapEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory argEvalFactory) + throws AlgebricksException { + super(context, argEvalFactory, StringInitCapDescriptor.this.getIdentifier()); + } + + // Processes an input UTF8 string. + protected void process(UTF8StringPointable inputString, IPointable resultPointable) throws AlgebricksException { + try { + inputString.initCap(resultBuilder, resultArray); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } + + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrim2Descriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrim2Descriptor.java new file mode 100644 index 0000000..a204b5b --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrim2Descriptor.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; + +public class StringLTrim2Descriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringLTrim2Descriptor(); + } + }; + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_LTRIM2; + } + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) + throws AlgebricksException { + return new IScalarEvaluatorFactory() { + + @Override + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new StringLTrimEvaluator(ctx, args[0], args[1]); + } + }; + } + + class StringLTrimEvaluator extends StringTrim2Descriptor.StringTrim2Evaluator { + + public StringLTrimEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory evalLeftFactory, + IScalarEvaluatorFactory evalRightFactory) throws AlgebricksException { + super(context, evalLeftFactory, evalRightFactory, StringLTrim2Descriptor.this.getIdentifier()); + } + + @Override + protected void trim(UTF8StringPointable srcPtr, UTF8StringBuilder builder, GrowableArray out) + throws IOException { + UTF8StringPointable.trim(srcPtr, builder, out, true, false, charSet); + } + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrimDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrimDescriptor.java new file mode 100644 index 0000000..45be85d --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLTrimDescriptor.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; + +public class StringLTrimDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringLTrimDescriptor(); + } + }; + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_LTRIM; + } + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) + throws AlgebricksException { + return new IScalarEvaluatorFactory() { + + @Override + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new StringTrimEvaluator(ctx, args[0], StringLTrimDescriptor.this.getIdentifier()); + } + }; + } + + static class StringTrimEvaluator extends StringTrimDescriptor.StringTrimEvaluator { + + public StringTrimEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory evalFactory, + FunctionIdentifier funcId) throws AlgebricksException { + super(context, evalFactory, funcId); + } + + @Override + protected void process(UTF8StringPointable srcPtr, IPointable resultPointable) throws AlgebricksException { + try { + UTF8StringPointable.trim(srcPtr, resultBuilder, resultArray, true, false, charSet); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLowerCaseDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLowerCaseDescriptor.java index 341a26a..552fd70 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLowerCaseDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringLowerCaseDescriptor.java @@ -18,14 +18,11 @@ */ package org.apache.asterix.runtime.evaluators.functions; -import java.io.DataOutput; import java.io.IOException; import org.apache.asterix.om.functions.AsterixBuiltinFunctions; import org.apache.asterix.om.functions.IFunctionDescriptor; import org.apache.asterix.om.functions.IFunctionDescriptorFactory; -import org.apache.asterix.om.types.ATypeTag; -import org.apache.asterix.om.types.EnumDeserializer; import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; @@ -34,11 +31,6 @@ import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.primitive.UTF8StringPointable; -import org.apache.hyracks.data.std.primitive.VoidPointable; -import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; -import org.apache.hyracks.data.std.util.GrowableArray; -import org.apache.hyracks.data.std.util.UTF8StringBuilder; -import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; public class StringLowerCaseDescriptor extends AbstractScalarFunctionDynamicDescriptor { private static final long serialVersionUID = 1L; @@ -51,51 +43,13 @@ }; @Override - public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) { + public IScalarEvaluatorFactory createEvaluatorFactory(IScalarEvaluatorFactory[] args) { return new IScalarEvaluatorFactory() { private static final long serialVersionUID = 1L; @Override - public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws AlgebricksException { - return new IScalarEvaluator() { - - private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); - private final DataOutput out = resultStorage.getDataOutput(); - private final IPointable inputArg = new VoidPointable(); - private final IScalarEvaluator eval = args[0].createScalarEvaluator(ctx); - - private final GrowableArray array = new GrowableArray(); - private final UTF8StringBuilder builder = new UTF8StringBuilder(); - private final UTF8StringPointable string = new UTF8StringPointable(); - - @Override - public void evaluate(IFrameTupleReference tuple, IPointable result) throws AlgebricksException { - - try { - resultStorage.reset(); - eval.evaluate(tuple, inputArg); - byte[] serString = inputArg.getByteArray(); - int offset = inputArg.getStartOffset(); - int len = inputArg.getLength() - 1; - - if (serString[offset] == ATypeTag.SERIALIZED_STRING_TYPE_TAG) { - string.set(serString, offset + 1, len - 1); - array.reset(); - UTF8StringPointable.lowercase(string, builder, array); - - out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); - out.write(array.getByteArray(), 0, array.getLength()); - } else { - throw new AlgebricksException(AsterixBuiltinFunctions.STRING_LOWERCASE.getName() - + ": expects input type STRING/NULL but got " - + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(serString[offset])); - } - result.set(resultStorage); - } catch (IOException e1) { - throw new AlgebricksException(e1); - } - } - }; + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new LowerCaseEvaluator(ctx, args[0]); } }; } @@ -105,4 +59,22 @@ return AsterixBuiltinFunctions.STRING_LOWERCASE; } + class LowerCaseEvaluator extends AbstractUnaryStringStringEval { + + public LowerCaseEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory argEvalFactory) + throws AlgebricksException { + super(context, argEvalFactory, StringLowerCaseDescriptor.this.getIdentifier()); + } + + // Processes an input UTF8 string. + protected void process(UTF8StringPointable inputString, IPointable resultPointable) throws AlgebricksException { + try { + inputString.lowercase(resultBuilder, resultArray); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } + + } + } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrim2Descriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrim2Descriptor.java new file mode 100644 index 0000000..1c80cb7 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrim2Descriptor.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; + +public class StringRTrim2Descriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringRTrim2Descriptor(); + } + }; + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_RTRIM2; + } + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) + throws AlgebricksException { + return new IScalarEvaluatorFactory() { + + @Override + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new StringLTrimEvaluator(ctx, args[0], args[1]); + } + }; + } + + class StringLTrimEvaluator extends StringTrim2Descriptor.StringTrim2Evaluator { + + public StringLTrimEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory evalLeftFactory, + IScalarEvaluatorFactory evalRightFactory) throws AlgebricksException { + super(context, evalLeftFactory, evalRightFactory, StringRTrim2Descriptor.this.getIdentifier()); + } + + @Override + protected void trim(UTF8StringPointable srcPtr, UTF8StringBuilder builder, GrowableArray out) + throws IOException { + UTF8StringPointable.trim(srcPtr, builder, out, false, true, charSet); + } + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrimDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrimDescriptor.java new file mode 100644 index 0000000..1466a88 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRTrimDescriptor.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; + +public class StringRTrimDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringRTrimDescriptor(); + } + }; + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_RTRIM; + } + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) + throws AlgebricksException { + return new IScalarEvaluatorFactory() { + + @Override + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new StringTrimEvaluator(ctx, args[0], StringRTrimDescriptor.this.getIdentifier()); + } + }; + } + + static class StringTrimEvaluator extends StringTrimDescriptor.StringTrimEvaluator { + + public StringTrimEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory evalFactory, + FunctionIdentifier funcId) throws AlgebricksException { + super(context, evalFactory, funcId); + } + + @Override + protected void process(UTF8StringPointable srcPtr, IPointable resultPointable) throws AlgebricksException { + try { + UTF8StringPointable.trim(srcPtr, resultBuilder, resultArray, false, true, charSet); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrim2Descriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrim2Descriptor.java new file mode 100644 index 0000000..3aed51f --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrim2Descriptor.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.commons.lang3.CharSet; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.ByteArrayAccessibleOutputStream; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; + +public class StringTrim2Descriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringTrim2Descriptor(); + } + }; + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_TRIM2; + } + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) + throws AlgebricksException { + return new IScalarEvaluatorFactory() { + + @Override + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new StringTrim2Evaluator(ctx, args[0], args[1], StringTrim2Descriptor.this.getIdentifier()); + } + }; + } + + static class StringTrim2Evaluator extends AbstractBinaryStringStringEval { + protected final ByteArrayAccessibleOutputStream lastPatternStorage = new ByteArrayAccessibleOutputStream(); + protected final UTF8StringPointable lastPatternPtr = new UTF8StringPointable(); + protected CharSet charSet; + + public StringTrim2Evaluator(IHyracksTaskContext context, IScalarEvaluatorFactory evalLeftFactory, + IScalarEvaluatorFactory evalRightFactory, FunctionIdentifier funcId) throws AlgebricksException { + super(context, evalLeftFactory, evalRightFactory, funcId); + } + + protected void compute(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, + UTF8StringPointable resultStrPtr) throws AlgebricksException { + boolean newPattern = false; + if (charSet == null || lastPatternPtr.compareTo(patternPtr) != 0) { + newPattern = true; + } + if (newPattern) { + StringEvaluatorUtils.copyResetUTF8Pointable(patternPtr, lastPatternStorage, lastPatternPtr); + charSet = CharSet.getInstance(patternPtr.toString()); + } + try { + trim(srcPtr, resultBuilder, resultArray); + } catch (IOException e) { + throw new AlgebricksException(e); + } + resultStrPtr.set(resultArray.getByteArray(), 0, resultArray.getLength()); + } + + protected void trim(UTF8StringPointable srcPtr, UTF8StringBuilder builder, GrowableArray out) + throws IOException { + UTF8StringPointable.trim(srcPtr, builder, out, true, true, charSet); + } + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrimDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrimDescriptor.java new file mode 100644 index 0000000..2af4611 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringTrimDescriptor.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.om.functions.AsterixBuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.commons.lang3.CharSet; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; + +public class StringTrimDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() { + @Override + public IFunctionDescriptor createFunctionDescriptor() { + return new StringTrimDescriptor(); + } + }; + + @Override + public FunctionIdentifier getIdentifier() { + return AsterixBuiltinFunctions.STRING_TRIM; + } + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) + throws AlgebricksException { + return new IScalarEvaluatorFactory() { + + @Override + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new StringTrimEvaluator(ctx, args[0], StringTrimDescriptor.this.getIdentifier()); + } + }; + } + + static class StringTrimEvaluator extends AbstractUnaryStringStringEval { + protected CharSet charSet = CharSet.getInstance(" "); + + public StringTrimEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory evalFactory, + FunctionIdentifier funcId) throws AlgebricksException { + super(context, evalFactory, funcId); + } + + @Override + protected void process(UTF8StringPointable srcPtr, IPointable resultPointable) throws AlgebricksException { + try { + UTF8StringPointable.trim(srcPtr, resultBuilder, resultArray, true, true, charSet); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } + } + +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringUpperCaseDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringUpperCaseDescriptor.java index 9676b8f..9887bd2 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringUpperCaseDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringUpperCaseDescriptor.java @@ -18,14 +18,11 @@ */ package org.apache.asterix.runtime.evaluators.functions; -import java.io.DataOutput; import java.io.IOException; import org.apache.asterix.om.functions.AsterixBuiltinFunctions; import org.apache.asterix.om.functions.IFunctionDescriptor; import org.apache.asterix.om.functions.IFunctionDescriptorFactory; -import org.apache.asterix.om.types.ATypeTag; -import org.apache.asterix.om.types.EnumDeserializer; import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; @@ -34,11 +31,6 @@ import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.primitive.UTF8StringPointable; -import org.apache.hyracks.data.std.primitive.VoidPointable; -import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; -import org.apache.hyracks.data.std.util.GrowableArray; -import org.apache.hyracks.data.std.util.UTF8StringBuilder; -import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; public class StringUpperCaseDescriptor extends AbstractScalarFunctionDynamicDescriptor { private static final long serialVersionUID = 1L; @@ -51,51 +43,13 @@ }; @Override - public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) { + public IScalarEvaluatorFactory createEvaluatorFactory(IScalarEvaluatorFactory[] args) { return new IScalarEvaluatorFactory() { private static final long serialVersionUID = 1L; @Override - public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws AlgebricksException { - return new IScalarEvaluator() { - - private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); - private DataOutput out = resultStorage.getDataOutput(); - private IPointable inputArg = new VoidPointable(); - private IScalarEvaluator eval = args[0].createScalarEvaluator(ctx); - - private final byte stt = ATypeTag.SERIALIZED_STRING_TYPE_TAG; - - private final GrowableArray array = new GrowableArray(); - private final UTF8StringBuilder builder = new UTF8StringBuilder(); - private final UTF8StringPointable string = new UTF8StringPointable(); - - @Override - public void evaluate(IFrameTupleReference tuple, IPointable result) throws AlgebricksException { - try { - resultStorage.reset(); - eval.evaluate(tuple, inputArg); - byte[] serString = inputArg.getByteArray(); - int offset = inputArg.getStartOffset(); - - if (serString[offset] == ATypeTag.SERIALIZED_STRING_TYPE_TAG) { - string.set(serString, offset + 1, serString.length); - array.reset(); - UTF8StringPointable.uppercase(string, builder, array); - - out.writeByte(stt); - out.write(array.getByteArray(), 0, array.getLength()); - } else { - throw new AlgebricksException(AsterixBuiltinFunctions.STRING_UPPERCASE.getName() - + ": expects input type STRING/NULL but got " - + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(serString[offset])); - } - result.set(resultStorage); - } catch (IOException e1) { - throw new AlgebricksException(e1); - } - } - }; + public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws AlgebricksException { + return new UpperCaseEvaluator(ctx, args[0]); } }; } @@ -105,4 +59,21 @@ return AsterixBuiltinFunctions.STRING_UPPERCASE; } + class UpperCaseEvaluator extends AbstractUnaryStringStringEval { + + public UpperCaseEvaluator(IHyracksTaskContext context, IScalarEvaluatorFactory argEvalFactory) + throws AlgebricksException { + super(context, argEvalFactory, StringUpperCaseDescriptor.this.getIdentifier()); + } + + // Processes an input UTF8 string. + protected void process(UTF8StringPointable inputString, IPointable resultPointable) throws AlgebricksException { + try { + inputString.uppercase(resultBuilder, resultArray); + } catch (IOException e) { + throw new AlgebricksException(e); + } + } + + } } diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java index 7e834db..1d07012 100644 --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.nio.charset.Charset; +import org.apache.commons.lang3.CharSet; import org.apache.hyracks.api.dataflow.value.ITypeTraits; import org.apache.hyracks.data.std.api.AbstractPointable; import org.apache.hyracks.data.std.api.IComparable; @@ -443,4 +444,78 @@ builder.finish(); } + public void initCap(UTF8StringBuilder builder, GrowableArray out) throws IOException { + initCap(this, builder, out); + } + + public static void initCap(UTF8StringPointable src, UTF8StringBuilder builder, GrowableArray out) + throws IOException { + final int srcUtfLen = src.getUTF8Length(); + final int srcStart = src.getMetaDataLength(); + + builder.reset(out, srcUtfLen); + boolean toUpperCase = true; + int byteIndex = 0; + char previousChar = ' '; + while (byteIndex < srcUtfLen) { + char originalChar = src.charAt(srcStart + byteIndex); + boolean isLetter = Character.isLetter(originalChar); + + // Make the first character into upper case while the later ones into lower case. + char resultChar = toUpperCase && isLetter ? Character.toUpperCase(originalChar) : (isLetter ? Character + .toLowerCase(originalChar) : originalChar); + builder.appendChar(resultChar); + byteIndex += src.charSize(srcStart + byteIndex); + + // Whether the next letter needs to switch to the upper case. + toUpperCase = !isLetter; + } + builder.finish(); + } + + public void trim(UTF8StringBuilder builder, GrowableArray out, boolean left, boolean right, CharSet charSet) + throws IOException { + trim(this, builder, out, left, right, charSet); + } + + public static void trim(UTF8StringPointable srcPtr, UTF8StringBuilder builder, GrowableArray out, boolean left, + boolean right, CharSet charSet) throws IOException { + final int srcUtfLen = srcPtr.getUTF8Length(); + final int srcStart = srcPtr.getMetaDataLength(); + // Finds the start Index (inclusive). + int startIndex = 0; + if (left) { + while (startIndex < srcUtfLen) { + char ch = srcPtr.charAt(srcStart + startIndex); + if (!charSet.contains(ch)) { + break; + } + startIndex += srcPtr.charSize(srcStart + startIndex); + } + } + + // Finds the end index (exclusive). + int endIndex = srcUtfLen; + if (right) { + endIndex = startIndex; + int cursorIndex = startIndex; + while (cursorIndex < srcUtfLen) { + char ch = srcPtr.charAt(srcStart + cursorIndex); + cursorIndex += srcPtr.charSize(srcStart + cursorIndex); + if (!charSet.contains(ch)) { + endIndex = cursorIndex; + } + } + } + + // Outputs the desired substring. + builder.reset(out, endIndex - startIndex); + while (startIndex < endIndex) { + char ch = srcPtr.charAt(srcStart + startIndex); + startIndex += srcPtr.charSize(srcStart + startIndex); + builder.appendChar(ch); + } + builder.finish(); + } + } diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java index c47cc36..93b2290 100644 --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java @@ -19,10 +19,12 @@ package org.apache.hyracks.data.std.primitive; +import static org.apache.hyracks.data.std.primitive.UTF8StringPointable.generateUTF8Pointable; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import org.apache.commons.lang3.CharSet; import org.apache.hyracks.data.std.util.GrowableArray; import org.apache.hyracks.data.std.util.UTF8StringBuilder; import org.apache.hyracks.util.string.UTF8StringSample; @@ -30,21 +32,17 @@ import org.junit.Test; public class UTF8StringPointableTest { - public static UTF8StringPointable STRING_EMPTY = UTF8StringPointable - .generateUTF8Pointable(UTF8StringSample.EMPTY_STRING); - public static UTF8StringPointable STRING_UTF8_MIX = UTF8StringPointable - .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX); - public static UTF8StringPointable STRING_UTF8_MIX_LOWERCASE = UTF8StringPointable.generateUTF8Pointable( + public static UTF8StringPointable STRING_EMPTY = generateUTF8Pointable(UTF8StringSample.EMPTY_STRING); + public static UTF8StringPointable STRING_UTF8_MIX = generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX); + public static UTF8StringPointable STRING_UTF8_MIX_LOWERCASE = generateUTF8Pointable( UTF8StringSample.STRING_UTF8_MIX_LOWERCASE); - public static UTF8StringPointable STRING_LEN_127 = UTF8StringPointable - .generateUTF8Pointable(UTF8StringSample.STRING_LEN_127); - public static UTF8StringPointable STRING_LEN_128 = UTF8StringPointable - .generateUTF8Pointable(UTF8StringSample.STRING_LEN_128); + public static UTF8StringPointable STRING_LEN_127 = generateUTF8Pointable(UTF8StringSample.STRING_LEN_127); + public static UTF8StringPointable STRING_LEN_128 = generateUTF8Pointable(UTF8StringSample.STRING_LEN_128); @Test public void testGetStringLength() throws Exception { - UTF8StringPointable utf8Ptr = UTF8StringPointable.generateUTF8Pointable(UTF8StringSample.STRING_LEN_127); + UTF8StringPointable utf8Ptr = generateUTF8Pointable(UTF8StringSample.STRING_LEN_127); assertEquals(127, utf8Ptr.getUTF8Length()); assertEquals(1, utf8Ptr.getMetaDataLength()); assertEquals(127, utf8Ptr.getStringLength()); @@ -84,7 +82,7 @@ @Test public void testConcat() throws Exception { - UTF8StringPointable expected = UTF8StringPointable.generateUTF8Pointable( + UTF8StringPointable expected = generateUTF8Pointable( UTF8StringSample.generateStringRepeatBy(UTF8StringSample.ONE_ASCII_CHAR, 127 + 128)); GrowableArray storage = new GrowableArray(); @@ -132,9 +130,9 @@ assertEquals(0, STRING_EMPTY.compareTo(result)); storage.reset(); - UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123"); - UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("文"); - UTF8StringPointable expect = UTF8StringPointable.generateUTF8Pointable("Mix中"); + UTF8StringPointable testPtr = generateUTF8Pointable("Mix中文123"); + UTF8StringPointable pattern = generateUTF8Pointable("文"); + UTF8StringPointable expect = generateUTF8Pointable("Mix中"); testPtr.substrBefore(pattern, builder, storage); result.set(storage.getByteArray(), 0, storage.getLength()); assertEquals(0, expect.compareTo(result)); @@ -149,14 +147,13 @@ UTF8StringPointable result = new UTF8StringPointable(); result.set(storage.getByteArray(), 0, storage.getLength()); - UTF8StringPointable expect = UTF8StringPointable - .generateUTF8Pointable(Character.toString(UTF8StringSample.ONE_ASCII_CHAR)); + UTF8StringPointable expect = generateUTF8Pointable(Character.toString(UTF8StringSample.ONE_ASCII_CHAR)); assertEquals(0, expect.compareTo(result)); storage.reset(); - UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123"); - UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("文"); - expect = UTF8StringPointable.generateUTF8Pointable("123"); + UTF8StringPointable testPtr = generateUTF8Pointable("Mix中文123"); + UTF8StringPointable pattern = generateUTF8Pointable("文"); + expect = generateUTF8Pointable("123"); testPtr.substrAfter(pattern, builder, storage); result.set(storage.getByteArray(), 0, storage.getLength()); assertEquals(0, expect.compareTo(result)); @@ -185,9 +182,79 @@ result.set(storage.getByteArray(), 0, storage.getLength()); - UTF8StringPointable expected = UTF8StringPointable - .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX_LOWERCASE.toUpperCase()); + UTF8StringPointable expected = generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX_LOWERCASE.toUpperCase()); + assertEquals(0, expected.compareTo(result)); + } + + @Test + public void testInitCap() throws Exception { + UTF8StringBuilder builder = new UTF8StringBuilder(); + GrowableArray storage = new GrowableArray(); + + UTF8StringPointable result = new UTF8StringPointable(); + UTF8StringPointable input = generateUTF8Pointable("this is it.i am;here."); + input.initCap(builder, storage); + + result.set(storage.getByteArray(), 0, storage.getLength()); + + UTF8StringPointable expected = generateUTF8Pointable("This Is It.I Am;Here."); + assertEquals(0, expected.compareTo(result)); + } + + @Test + public void testTrim() throws Exception { + UTF8StringBuilder builder = new UTF8StringBuilder(); + GrowableArray storage = new GrowableArray(); + UTF8StringPointable result = new UTF8StringPointable(); + UTF8StringPointable input = generateUTF8Pointable(" this is it.i am;here. "); + + // Trims both sides. + input.trim(builder, storage, true, true, CharSet.getInstance(" ")); + result.set(storage.getByteArray(), 0, storage.getLength()); + UTF8StringPointable expected = generateUTF8Pointable("this is it.i am;here."); assertEquals(0, expected.compareTo(result)); + // Only trims the right side. + storage.reset(); + input.trim(builder, storage, false, true, CharSet.getInstance(" ")); + result.set(storage.getByteArray(), 0, storage.getLength()); + expected = generateUTF8Pointable(" this is it.i am;here."); + assertEquals(0, expected.compareTo(result)); + + // Only trims the left side. + storage.reset(); + input.trim(builder, storage, true, false, CharSet.getInstance(" ")); + result.set(storage.getByteArray(), 0, storage.getLength()); + expected = generateUTF8Pointable("this is it.i am;here. "); + assertEquals(0, expected.compareTo(result)); } + + @Test + public void testTrimWithPattern() throws Exception { + UTF8StringBuilder builder = new UTF8StringBuilder(); + GrowableArray storage = new GrowableArray(); + UTF8StringPointable result = new UTF8StringPointable(); + UTF8StringPointable input = generateUTF8Pointable(" this is it.i am;here. "); + + // Trims both sides. + input.trim(builder, storage, true, true, CharSet.getInstance(" hert.")); + result.set(storage.getByteArray(), 0, storage.getLength()); + UTF8StringPointable expected = generateUTF8Pointable("is is it.i am;"); + assertEquals(0, expected.compareTo(result)); + + // Only trims the right side. + storage.reset(); + input.trim(builder, storage, false, true, CharSet.getInstance(" hert.")); + result.set(storage.getByteArray(), 0, storage.getLength()); + expected = generateUTF8Pointable(" this is it.i am;"); + assertEquals(0, expected.compareTo(result)); + + // Only trims the left side. + storage.reset(); + input.trim(builder, storage, true, false, CharSet.getInstance(" hert.")); + result.set(storage.getByteArray(), 0, storage.getLength()); + expected = generateUTF8Pointable("is is it.i am;here. "); + assertEquals(0, expected.compareTo(result)); + } + } -- To view, visit https://asterix-gerrit.ics.uci.edu/1104 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I26351af22f67d66b56176f55b29a4e7ff63583f7 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Yingyi Bu <buyin...@gmail.com>