Repository: systemml Updated Branches: refs/heads/master 6ca9be1f5 -> 62647de61
[MINOR] Added an external UDF to split string - Also, updated ListObject to specify the valuetype of the list. This takes care of the "wrong value type warning". Closes #844. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/62647de6 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/62647de6 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/62647de6 Branch: refs/heads/master Commit: 62647de614a549cba7e89446b89308bb531e61a1 Parents: 6ca9be1 Author: Niketan Pansare <npan...@us.ibm.com> Authored: Thu Nov 29 16:01:09 2018 -0800 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Thu Nov 29 16:01:09 2018 -0800 ---------------------------------------------------------------------- .../runtime/instructions/cp/ListObject.java | 14 +++- .../org/apache/sysml/udf/lib/SplitWrapper.java | 85 ++++++++++++++++++++ 2 files changed, 96 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/62647de6/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java index 863b77c..576e57c 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java @@ -37,11 +37,19 @@ public class ListObject extends Data { private int _nCacheable; public ListObject(List<Data> data) { - this(data, null); + this(data, null, ValueType.UNKNOWN); } - + + public ListObject(List<Data> data, ValueType vt) { + this(data, null, vt); + } + public ListObject(List<Data> data, List<String> names) { - super(DataType.LIST, ValueType.UNKNOWN); + this(data, names, ValueType.UNKNOWN); + } + + public ListObject(List<Data> data, List<String> names, ValueType vt) { + super(DataType.LIST, vt); _data = data; _names = names; _nCacheable = (int) data.stream().filter( http://git-wip-us.apache.org/repos/asf/systemml/blob/62647de6/src/main/java/org/apache/sysml/udf/lib/SplitWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/lib/SplitWrapper.java b/src/main/java/org/apache/sysml/udf/lib/SplitWrapper.java new file mode 100644 index 0000000..75cb27a --- /dev/null +++ b/src/main/java/org/apache/sysml/udf/lib/SplitWrapper.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.udf.lib; + +import java.util.ArrayList; + +import org.apache.sysml.parser.Expression.ValueType; +import org.apache.sysml.runtime.instructions.cp.Data; +import org.apache.sysml.runtime.instructions.cp.ListObject; +import org.apache.sysml.runtime.instructions.cp.StringObject; +import org.apache.sysml.udf.FunctionParameter; +import org.apache.sysml.udf.PackageFunction; +import org.apache.sysml.udf.Scalar; +import org.apache.sysml.udf.List; + +/** + * Wrapper class for split invocation + * + * split = externalFunction(String s, String regex, int limit) return (list[String] out) implemented in + * (classname="org.apache.sysml.udf.lib.SplitWrapper",exectype="mem"); + * + * out = split ("foo_goo_boo", "_", 2); + * for ( i in 1:3) { print(as.scalar(out[i])); } + * + */ +public class SplitWrapper extends PackageFunction { + private static final long serialVersionUID = 1L; + + private List outputList; + + @Override + public int getNumFunctionOutputs() { + return 1; + } + + @Override + public FunctionParameter getFunctionOutput(int pos) { + if (pos == 0) + return outputList; + else + throw new RuntimeException("Invalid function output being requested"); + } + + @Override + public void execute() { + String str = ((Scalar) getFunctionInput(0)).getValue(); + String regex = ((Scalar) getFunctionInput(1)).getValue(); + + int numInputs = getNumFunctionInputs(); + String [] parts = null; + if(numInputs == 2) { + parts = str.split(regex); + } + else if(numInputs == 3) { + parts = str.split(regex, Integer.parseInt(((Scalar) getFunctionInput(2)).getValue())); + } + else { + throw new RuntimeException("Incorrect number of inputs. Expected 2 or 3 inputs."); + } + + java.util.List<Data> outputData = new ArrayList<>(); + for(String part : parts) { + outputData.add(new StringObject(part)); + } + outputList = new List(new ListObject(outputData, ValueType.STRING)); + } + +}