This is an automated email from the ASF dual-hosted git repository. estrauss pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new d80e3a6d04 [SYSTEMDS-3758] Python API Builtin triu, tril, argmin, argmax and casting Scalar <-> Matrix <-> Frame d80e3a6d04 is described below commit d80e3a6d0493eed0043a494dbaeb875d5441a191 Author: e-strauss <lathan...@gmx.de> AuthorDate: Thu Sep 26 17:16:41 2024 +0200 [SYSTEMDS-3758] Python API Builtin triu, tril, argmin, argmax and casting Scalar <-> Matrix <-> Frame Closes #2113 --- src/main/python/systemds/operator/nodes/frame.py | 8 +- src/main/python/systemds/operator/nodes/matrix.py | 91 ++++++++++++++++++ src/main/python/systemds/operator/nodes/scalar.py | 24 ++++- .../python/systemds/operator/operation_node.py | 9 ++ src/main/python/systemds/utils/converters.py | 2 +- src/main/python/tests/matrix/test_arg_min_max.py | 95 +++++++++++++++++++ src/main/python/tests/matrix/test_casting.py | 76 +++++++++++++++ src/main/python/tests/matrix/test_triangular.py | 104 +++++++++++++++++++++ 8 files changed, 406 insertions(+), 3 deletions(-) diff --git a/src/main/python/systemds/operator/nodes/frame.py b/src/main/python/systemds/operator/nodes/frame.py index 2d9e231954..9c76fc5aca 100644 --- a/src/main/python/systemds/operator/nodes/frame.py +++ b/src/main/python/systemds/operator/nodes/frame.py @@ -44,8 +44,14 @@ if TYPE_CHECKING: from systemds.context import SystemDSContext -class Frame(OperationNode): +def to_frame(self): + return Frame(self.sds_context, "as.frame", [self]) + + +OperationNode.to_frame = to_frame + +class Frame(OperationNode): _pd_dataframe: pd.DataFrame def __init__( diff --git a/src/main/python/systemds/operator/nodes/matrix.py b/src/main/python/systemds/operator/nodes/matrix.py index cddfad6d46..208e248ec6 100644 --- a/src/main/python/systemds/operator/nodes/matrix.py +++ b/src/main/python/systemds/operator/nodes/matrix.py @@ -41,6 +41,13 @@ from systemds.utils.helpers import ( ) +def to_matrix(self): + return Matrix(self.sds_context, "as.matrix", [self]) + + +OperationNode.to_matrix = to_matrix + + class Matrix(OperationNode): _np_array: np.array @@ -842,5 +849,89 @@ class Matrix(OperationNode): return ifft_node + def triu(self, include_diagonal=True, return_values=True) -> "Matrix": + """Selects the upper triangular part of a matrix, configurable to include the diagonal and return values or ones + + :param include_diagonal: boolean, default True + :param return_values: boolean, default True, if set to False returns ones + :return: `Matrix` + """ + named_input_nodes = { + "target": self, + "diag": self.sds_context.scalar(include_diagonal), + "values": self.sds_context.scalar(return_values), + } + return Matrix( + self.sds_context, "upper.tri", named_input_nodes=named_input_nodes + ) + + def tril(self, include_diagonal=True, return_values=True) -> "Matrix": + """Selects the lower triangular part of a matrix, configurable to include the diagonal and return values or ones + + :param include_diagonal: boolean, default True + :param return_values: boolean, default True, if set to False returns ones + :return: `Matrix` + """ + named_input_nodes = { + "target": self, + "diag": self.sds_context.scalar(include_diagonal), + "values": self.sds_context.scalar(return_values), + } + return Matrix( + self.sds_context, "lower.tri", named_input_nodes=named_input_nodes + ) + + def argmin(self, axis: int = None) -> "OperationNode": + """Return the index of the minimum if axis is None or a column vector for row-wise / column-wise minima + computation. + + :param axis: can be 0 or 1 to do either row or column sums + :return: `Matrix` representing operation for row / columns or 'Scalar' representing operation for complete + """ + if axis == 0: + return Matrix(self.sds_context, "rowIndexMin", [self.t()]) + elif axis == 1: + return Matrix(self.sds_context, "rowIndexMin", [self]) + elif axis is None: + return Matrix( + self.sds_context, + "rowIndexMin", + [self.reshape(1, self.nCol() * self.nRow())], + ).to_scalar() + else: + raise ValueError( + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) + + def argmax(self, axis: int = None) -> "OperationNode": + """Return the index of the maximum if axis is None or a column vector for row-wise / column-wise maxima + computation. + + :param axis: can be 0 or 1 to do either row or column sums + :return: `Matrix` representing operation for row / columns or 'Scalar' representing operation for complete + """ + if axis == 0: + return Matrix(self.sds_context, "rowIndexMax", [self.t()]) + elif axis == 1: + return Matrix(self.sds_context, "rowIndexMax", [self]) + elif axis is None: + return Matrix( + self.sds_context, + "rowIndexMax", + [self.reshape(1, self.nCol() * self.nRow())], + ).to_scalar() + else: + raise ValueError( + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) + + def reshape(self, rows, cols=1): + """Gives a new shape to a matrix without changing its data. + + :param rows: number of rows + :param cols: number of columns, defaults to 1 + :return: `Matrix` representing operation""" + return Matrix(self.sds_context, "matrix", [self, rows, cols]) + def __str__(self): return "MatrixNode" diff --git a/src/main/python/systemds/operator/nodes/scalar.py b/src/main/python/systemds/operator/nodes/scalar.py index 1d87ce5637..9224bba67b 100644 --- a/src/main/python/systemds/operator/nodes/scalar.py +++ b/src/main/python/systemds/operator/nodes/scalar.py @@ -32,7 +32,13 @@ from systemds.utils.consts import ( VALID_ARITHMETIC_TYPES, VALID_INPUT_TYPES, ) -from systemds.utils.converters import numpy_to_matrix_block + + +def to_scalar(self): + return Scalar(self.sds_context, "as.scalar", [self]) + + +OperationNode.to_scalar = to_scalar class Scalar(OperationNode): @@ -67,6 +73,8 @@ class Scalar(OperationNode): named_input_vars: Dict[str, str], ) -> str: if self.__assign: + if type(self.operation) is bool: + self.operation = "TRUE" if self.operation else "FALSE" return f"{var_name}={self.operation};" else: return super().code_line(var_name, unnamed_input_vars, named_input_vars) @@ -289,6 +297,20 @@ class Scalar(OperationNode): """ return Scalar(self.sds_context, "toString", [self], named_input_nodes=kwargs) + def to_int(self) -> "Scalar": + return Scalar( + self.sds_context, + "as.integer", + [self], + ) + + def to_boolean(self) -> "Scalar": + return Scalar( + self.sds_context, + "as.logical", + [self], + ) + def isNA(self) -> "Scalar": """Computes a boolean indicator matrix of the same shape as the input, indicating where NA (not available) values are located. Currently, NA is only capturing NaN values. diff --git a/src/main/python/systemds/operator/operation_node.py b/src/main/python/systemds/operator/operation_node.py index 41c40df900..c93141fb32 100644 --- a/src/main/python/systemds/operator/operation_node.py +++ b/src/main/python/systemds/operator/operation_node.py @@ -202,3 +202,12 @@ class OperationNode(DAGNode): To get the returned string look at the stdout of SystemDSContext. """ return OperationNode(self.sds_context, "print", [self], kwargs) + + def to_frame(self): + raise NotImplementedError("should have been overwritten in frame.py") + + def to_matrix(self): + raise NotImplementedError("should have been overwritten in matrix.py") + + def to_scalar(self): + raise NotImplementedError("should have been overwritten in scalar.py") diff --git a/src/main/python/systemds/utils/converters.py b/src/main/python/systemds/utils/converters.py index 5ce3fbde57..8551b8ce6a 100644 --- a/src/main/python/systemds/utils/converters.py +++ b/src/main/python/systemds/utils/converters.py @@ -104,7 +104,7 @@ def pandas_to_frame_block(sds, pd_df: pd.DataFrame): np.dtype(np.int32): jvm.org.apache.sysds.common.Types.ValueType.INT32, np.dtype(np.float32): jvm.org.apache.sysds.common.Types.ValueType.FP32, np.dtype(np.uint8): jvm.org.apache.sysds.common.Types.ValueType.UINT8, - np.dtype(np.character): jvm.org.apache.sysds.common.Types.ValueType.CHARACTER, + np.dtype(np.str_): jvm.org.apache.sysds.common.Types.ValueType.CHARACTER, } schema = [] col_names = [] diff --git a/src/main/python/tests/matrix/test_arg_min_max.py b/src/main/python/tests/matrix/test_arg_min_max.py new file mode 100644 index 0000000000..602a9dfee2 --- /dev/null +++ b/src/main/python/tests/matrix/test_arg_min_max.py @@ -0,0 +1,95 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import unittest +import numpy as np +from systemds.context import SystemDSContext + +np.random.seed(7) +m = np.array([[1, 2, 3], [6, 5, 4], [8, 7, 9]]) +M = np.random.random_integers(9, size=300).reshape(100, 3) +p = np.array([0.25, 0.5, 0.75]) +m2 = np.array([1, 2, 3, 4, 5]) +w2 = np.array([1, 1, 1, 1, 5]) + + +def weighted_quantiles(values, weights, quantiles=0.5): + i = np.argsort(values) + c = np.cumsum(weights[i]) + return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]] + + +class TestARGMINMAX(unittest.TestCase): + def setUp(self): + self.sds = SystemDSContext() + + def tearDown(self): + self.sds.close() + + def test_argmin_basic1(self): + sds_input = self.sds.from_numpy(m) + sds_result = sds_input.argmin(0).compute() + np_result = np.argmin(m, axis=0).reshape(-1, 1) + assert np.allclose(sds_result - 1, np_result, 1e-9) + + def test_argmin_basic2(self): + sds_input = self.sds.from_numpy(m) + sds_result = sds_input.argmin(1).compute() + np_result = np.argmin(m, axis=1).reshape(-1, 1) + assert np.allclose(sds_result - 1, np_result, 1e-9) + + def test_argmin_basic3(self): + sds_input = self.sds.from_numpy(m) + sds_result = sds_input.argmin().compute(verbose=True) + np_result = np.argmin(m) + assert np.allclose(sds_result - 1, np_result, 1e-9) + + def test_argmin_basic4(self): + sds_input = self.sds.from_numpy(m) + with self.assertRaises(ValueError): + sds_input.argmin(3) + + def test_argmax_basic1(self): + sds_input = self.sds.from_numpy(m) + sds_result = sds_input.argmax(0).compute() + np_result = np.argmax(m, axis=0).reshape(-1, 1) + assert np.allclose(sds_result - 1, np_result, 1e-9) + + def test_argmax_basic2(self): + sds_input = self.sds.from_numpy(m) + sds_result = sds_input.argmax(1).compute() + np_result = np.argmax(m, axis=1).reshape(-1, 1) + assert np.allclose(sds_result - 1, np_result, 1e-9) + + def test_argmax_basic3(self): + sds_input = self.sds.from_numpy(m) + sds_result = sds_input.argmax().compute() + np_result = np.argmax(m) + assert np.allclose(sds_result - 1, np_result, 1e-9) + + def test_argmax_basic4(self): + sds_input = self.sds.from_numpy(m) + with self.assertRaises(ValueError): + sds_input.argmax(3) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/main/python/tests/matrix/test_casting.py b/src/main/python/tests/matrix/test_casting.py new file mode 100644 index 0000000000..f990ec09d3 --- /dev/null +++ b/src/main/python/tests/matrix/test_casting.py @@ -0,0 +1,76 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import unittest +import numpy as np +from systemds.context import SystemDSContext +from pandas import DataFrame +from numpy import ndarray + + +class TestDIAG(unittest.TestCase): + def setUp(self): + self.sds = SystemDSContext() + + def tearDown(self): + self.sds.close() + + def test_casting_basic1(self): + sds_input = self.sds.from_numpy(np.array([[1]])) + sds_result = sds_input.to_scalar().compute() + self.assertTrue(type(sds_result) == float) + + def test_casting_basic2(self): + sds_input = self.sds.from_numpy(np.array([[1]])) + sds_result = sds_input.to_frame().compute() + self.assertTrue(type(sds_result) == DataFrame) + + def test_casting_basic3(self): + sds_result = self.sds.scalar(1.0).to_frame().compute() + self.assertTrue(type(sds_result) == DataFrame) + + def test_casting_basic4(self): + sds_result = self.sds.scalar(1.0).to_matrix().compute() + self.assertTrue(type(sds_result) == ndarray) + + def test_casting_basic5(self): + ar = ndarray((2, 2)) + df = DataFrame(ar) + sds_result = self.sds.from_pandas(df).to_matrix().compute() + self.assertTrue(type(sds_result) == ndarray and np.allclose(ar, sds_result)) + + def test_casting_basic6(self): + ar = ndarray((1, 1)) + df = DataFrame(ar) + sds_result = self.sds.from_pandas(df).to_scalar().compute() + self.assertTrue(type(sds_result) == float) + + def test_casting_basic7(self): + sds_result = self.sds.scalar(1.0).to_int().compute() + self.assertTrue(type(sds_result) == int and sds_result) + + def test_casting_basic8(self): + sds_result = self.sds.scalar(1.0).to_boolean().compute() + self.assertTrue(type(sds_result) == bool) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/main/python/tests/matrix/test_triangular.py b/src/main/python/tests/matrix/test_triangular.py new file mode 100644 index 0000000000..f7ea2d840b --- /dev/null +++ b/src/main/python/tests/matrix/test_triangular.py @@ -0,0 +1,104 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import unittest +import numpy as np +from systemds.context import SystemDSContext + +m1 = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) + +m2 = np.random.random((10, 10)) + + +class TestTRIANGULAR(unittest.TestCase): + def setUp(self): + self.sds = SystemDSContext() + + def tearDown(self): + self.sds.close() + + def test_triu_basic1(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.triu().compute() + np_result = np.triu(m1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_triu_basic2(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.triu(include_diagonal=False).compute() + np_result = np.triu(m1, 1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_triu_basic3(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.triu(return_values=False).compute() + np_result = np.triu(m1) > 0 + assert np.allclose(sds_result, np_result, 1e-9) + + def test_triu_basic4(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.triu( + return_values=False, include_diagonal=False + ).compute() + np_result = np.triu(m1, 1) > 0 + assert np.allclose(sds_result, np_result, 1e-9) + + def test_triu_random(self): + sds_input = self.sds.from_numpy(m2) + sds_result = sds_input.triu().compute() + np_result = np.triu(m2) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_tril_basic1(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.tril().compute() + np_result = np.tril(m1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_tril_basic2(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.tril(include_diagonal=False).compute() + np_result = np.tril(m1, -1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_tril_basic3(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.tril(return_values=False).compute() + np_result = np.tril(m1) > 0 + assert np.allclose(sds_result, np_result, 1e-9) + + def test_tril_basic4(self): + sds_input = self.sds.from_numpy(m1) + sds_result = sds_input.tril( + return_values=False, include_diagonal=False + ).compute() + np_result = np.tril(m1, -1) > 0 + assert np.allclose(sds_result, np_result, 1e-9) + + def test_tril_random(self): + sds_input = self.sds.from_numpy(m2) + sds_result = sds_input.tril().compute() + np_result = np.tril(m2) + assert np.allclose(sds_result, np_result, 1e-9) + + +if __name__ == "__main__": + unittest.main()