This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 2d3c1e6115 [SYSTEMDS-3742] Python API Builtin unique
2d3c1e6115 is described below

commit 2d3c1e61156a29e58e299f9832202b7ad89a4a0f
Author: e-strauss <92718421+e-stra...@users.noreply.github.com>
AuthorDate: Tue Sep 3 22:08:16 2024 +0200

    [SYSTEMDS-3742] Python API Builtin unique
    
    Closes #2089
---
 src/main/python/systemds/operator/nodes/matrix.py |  17 ++++
 src/main/python/tests/matrix/test_unique.py       | 118 ++++++++++++++++++++++
 2 files changed, 135 insertions(+)

diff --git a/src/main/python/systemds/operator/nodes/matrix.py 
b/src/main/python/systemds/operator/nodes/matrix.py
index 07566ebfd0..4e010bf088 100644
--- a/src/main/python/systemds/operator/nodes/matrix.py
+++ b/src/main/python/systemds/operator/nodes/matrix.py
@@ -296,6 +296,23 @@ class Matrix(OperationNode):
         """
         return Scalar(self.sds_context, 'trace', [self])
 
+    def unique(self, axis: int = None) -> 'Matrix':
+        """Returns the unique values for the complete matrix, for each row or 
for each column.
+
+        :param axis: can be 0 or 1 to do either row or column uniques
+        :return: `Matrix` representing operation
+        """
+        if axis == 0:
+            named_input_nodes = {"dir": '"c"'}
+            return Matrix(self.sds_context, 'unique', [self], 
named_input_nodes=named_input_nodes)
+        elif axis == 1:
+            named_input_nodes = {"dir": '"r"'}
+            return Matrix(self.sds_context, 'unique', [self], 
named_input_nodes=named_input_nodes)
+        elif axis is None:
+            return Matrix(self.sds_context, 'unique', [self])
+        raise ValueError(
+            f"Axis has to be either 0, 1 or None, for column, row or complete 
{self.operation}")
+
     def abs(self) -> 'Matrix':
         """Calculate absolute.
 
diff --git a/src/main/python/tests/matrix/test_unique.py 
b/src/main/python/tests/matrix/test_unique.py
new file mode 100644
index 0000000000..b84c3ae2f5
--- /dev/null
+++ b/src/main/python/tests/matrix/test_unique.py
@@ -0,0 +1,118 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+
+
+np.random.seed(7)
+
+
+# np's unique applied on an axis checks for unique vectors along that axis -> 
on the other hand systemds' unique
+# returns the unique values along that axis for each vector on that axis
+def compute_expected(m, num_cols, axis):
+    def padded(row):
+        unique = np.unique(row)
+        row = np.pad(unique, (num_cols - len(unique), 0), "constant", 
constant_values=0)
+        return row
+
+    if axis == 1:
+        return np.array([padded(r) for r in m])
+    else:
+        return np.array([padded(r) for r in m.T]).T
+
+
+class TestUNIQUE(unittest.TestCase):
+    def setUp(self):
+        self.sds = SystemDSContext()
+
+    def tearDown(self):
+        self.sds.close()
+
+    def test_unique_basic(self):
+        input_matrix = np.array(
+            [[1, -2, 3, 4], [0, -6, 7, 8], [0, -10, 11, -12], [0, -14, 15, 
-16]]
+        )
+
+        sds_input = self.sds.from_numpy(input_matrix)
+        sds_result = sds_input.unique().compute()
+        sds_result = np.sort(np.reshape(sds_result, (-1)))
+        np_result = np.unique(input_matrix)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_unique_basic2(self):
+        input_matrix = np.array(
+            [[1, 1, 1, 1], [2, 2, 2, 2], [0, 10, 11, 12], [0, 14, 15, 16]]
+        )
+
+        sds_input = self.sds.from_numpy(input_matrix)
+        sds_result = sds_input.unique(1).compute()
+        sds_result = np.sort(sds_result, 1)
+        num_cols = sds_result.shape[1]
+        np_result = compute_expected(input_matrix, num_cols, 1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_unique_basic3(self):
+        input_matrix = np.array(
+            [[0, 1, 1, 1], [0, 1, 1, 1], [0, 10, 11, 12], [0, 14, 15, 16]]
+        )
+
+        sds_input = self.sds.from_numpy(input_matrix)
+        sds_result = sds_input.unique(0).compute()
+        sds_result = np.sort(sds_result, 0)
+        num_rows = sds_result.shape[0]
+        np_result = compute_expected(input_matrix, num_rows, 0)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_unique_random1(self):
+        input_matrix = np.random.random((10, 10)) * 200
+        sds_input = self.sds.from_numpy(input_matrix)
+        sds_result = sds_input.unique().compute()
+        sds_result = np.sort(np.reshape(sds_result, (-1)))
+        np_result = np.unique(input_matrix)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_unique_random2(self):
+        input_matrix = np.random.random((10, 10)) * 200
+        sds_input = self.sds.from_numpy(input_matrix)
+        sds_result = sds_input.unique(1).compute()
+        sds_result = np.sort(sds_result, 1)
+        num_cols = sds_result.shape[1]
+        np_result = compute_expected(input_matrix, num_cols, 1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_unique_random3(self):
+        input_matrix = np.random.random((10, 10)) * 200
+        sds_input = self.sds.from_numpy(input_matrix)
+        sds_result = sds_input.unique(0).compute()
+        sds_result = np.sort(sds_result, 0)
+        num_rows = sds_result.shape[0]
+        np_result = compute_expected(input_matrix, num_rows, 0)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_unique_error(self):
+        with self.assertRaises(ValueError):
+            self.sds.from_numpy(np.array([[1, 2]])).unique(2)
+
+
+if __name__ == "__main__":
+    unittest.main()

Reply via email to