This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 2d3c1e6115 [SYSTEMDS-3742] Python API Builtin unique 2d3c1e6115 is described below commit 2d3c1e61156a29e58e299f9832202b7ad89a4a0f Author: e-strauss <92718421+e-stra...@users.noreply.github.com> AuthorDate: Tue Sep 3 22:08:16 2024 +0200 [SYSTEMDS-3742] Python API Builtin unique Closes #2089 --- src/main/python/systemds/operator/nodes/matrix.py | 17 ++++ src/main/python/tests/matrix/test_unique.py | 118 ++++++++++++++++++++++ 2 files changed, 135 insertions(+) diff --git a/src/main/python/systemds/operator/nodes/matrix.py b/src/main/python/systemds/operator/nodes/matrix.py index 07566ebfd0..4e010bf088 100644 --- a/src/main/python/systemds/operator/nodes/matrix.py +++ b/src/main/python/systemds/operator/nodes/matrix.py @@ -296,6 +296,23 @@ class Matrix(OperationNode): """ return Scalar(self.sds_context, 'trace', [self]) + def unique(self, axis: int = None) -> 'Matrix': + """Returns the unique values for the complete matrix, for each row or for each column. + + :param axis: can be 0 or 1 to do either row or column uniques + :return: `Matrix` representing operation + """ + if axis == 0: + named_input_nodes = {"dir": '"c"'} + return Matrix(self.sds_context, 'unique', [self], named_input_nodes=named_input_nodes) + elif axis == 1: + named_input_nodes = {"dir": '"r"'} + return Matrix(self.sds_context, 'unique', [self], named_input_nodes=named_input_nodes) + elif axis is None: + return Matrix(self.sds_context, 'unique', [self]) + raise ValueError( + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + def abs(self) -> 'Matrix': """Calculate absolute. diff --git a/src/main/python/tests/matrix/test_unique.py b/src/main/python/tests/matrix/test_unique.py new file mode 100644 index 0000000000..b84c3ae2f5 --- /dev/null +++ b/src/main/python/tests/matrix/test_unique.py @@ -0,0 +1,118 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import unittest +import numpy as np +from systemds.context import SystemDSContext + + +np.random.seed(7) + + +# np's unique applied on an axis checks for unique vectors along that axis -> on the other hand systemds' unique +# returns the unique values along that axis for each vector on that axis +def compute_expected(m, num_cols, axis): + def padded(row): + unique = np.unique(row) + row = np.pad(unique, (num_cols - len(unique), 0), "constant", constant_values=0) + return row + + if axis == 1: + return np.array([padded(r) for r in m]) + else: + return np.array([padded(r) for r in m.T]).T + + +class TestUNIQUE(unittest.TestCase): + def setUp(self): + self.sds = SystemDSContext() + + def tearDown(self): + self.sds.close() + + def test_unique_basic(self): + input_matrix = np.array( + [[1, -2, 3, 4], [0, -6, 7, 8], [0, -10, 11, -12], [0, -14, 15, -16]] + ) + + sds_input = self.sds.from_numpy(input_matrix) + sds_result = sds_input.unique().compute() + sds_result = np.sort(np.reshape(sds_result, (-1))) + np_result = np.unique(input_matrix) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_unique_basic2(self): + input_matrix = np.array( + [[1, 1, 1, 1], [2, 2, 2, 2], [0, 10, 11, 12], [0, 14, 15, 16]] + ) + + sds_input = self.sds.from_numpy(input_matrix) + sds_result = sds_input.unique(1).compute() + sds_result = np.sort(sds_result, 1) + num_cols = sds_result.shape[1] + np_result = compute_expected(input_matrix, num_cols, 1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_unique_basic3(self): + input_matrix = np.array( + [[0, 1, 1, 1], [0, 1, 1, 1], [0, 10, 11, 12], [0, 14, 15, 16]] + ) + + sds_input = self.sds.from_numpy(input_matrix) + sds_result = sds_input.unique(0).compute() + sds_result = np.sort(sds_result, 0) + num_rows = sds_result.shape[0] + np_result = compute_expected(input_matrix, num_rows, 0) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_unique_random1(self): + input_matrix = np.random.random((10, 10)) * 200 + sds_input = self.sds.from_numpy(input_matrix) + sds_result = sds_input.unique().compute() + sds_result = np.sort(np.reshape(sds_result, (-1))) + np_result = np.unique(input_matrix) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_unique_random2(self): + input_matrix = np.random.random((10, 10)) * 200 + sds_input = self.sds.from_numpy(input_matrix) + sds_result = sds_input.unique(1).compute() + sds_result = np.sort(sds_result, 1) + num_cols = sds_result.shape[1] + np_result = compute_expected(input_matrix, num_cols, 1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_unique_random3(self): + input_matrix = np.random.random((10, 10)) * 200 + sds_input = self.sds.from_numpy(input_matrix) + sds_result = sds_input.unique(0).compute() + sds_result = np.sort(sds_result, 0) + num_rows = sds_result.shape[0] + np_result = compute_expected(input_matrix, num_rows, 0) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_unique_error(self): + with self.assertRaises(ValueError): + self.sds.from_numpy(np.array([[1, 2]])).unique(2) + + +if __name__ == "__main__": + unittest.main()