Frame

estrauss Thu, 26 Sep 2024 09:18:05 -0700

This is an automated email from the ASF dual-hosted git repository.

estrauss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new d80e3a6d04 [SYSTEMDS-3758] Python API Builtin triu, tril, argmin, 
argmax and casting Scalar <-> Matrix <-> Frame
d80e3a6d04 is described below

commit d80e3a6d0493eed0043a494dbaeb875d5441a191
Author: e-strauss <[email protected]>
AuthorDate: Thu Sep 26 17:16:41 2024 +0200

    [SYSTEMDS-3758] Python API Builtin triu, tril, argmin, argmax and casting 
Scalar <-> Matrix <-> Frame
    
    Closes #2113
---
 src/main/python/systemds/operator/nodes/frame.py   |   8 +-
 src/main/python/systemds/operator/nodes/matrix.py  |  91 ++++++++++++++++++
 src/main/python/systemds/operator/nodes/scalar.py  |  24 ++++-
 .../python/systemds/operator/operation_node.py     |   9 ++
 src/main/python/systemds/utils/converters.py       |   2 +-
 src/main/python/tests/matrix/test_arg_min_max.py   |  95 +++++++++++++++++++
 src/main/python/tests/matrix/test_casting.py       |  76 +++++++++++++++
 src/main/python/tests/matrix/test_triangular.py    | 104 +++++++++++++++++++++
 8 files changed, 406 insertions(+), 3 deletions(-)

diff --git a/src/main/python/systemds/operator/nodes/frame.py 
b/src/main/python/systemds/operator/nodes/frame.py
index 2d9e231954..9c76fc5aca 100644
--- a/src/main/python/systemds/operator/nodes/frame.py
+++ b/src/main/python/systemds/operator/nodes/frame.py
@@ -44,8 +44,14 @@ if TYPE_CHECKING:
     from systemds.context import SystemDSContext
 
 
-class Frame(OperationNode):
+def to_frame(self):
+    return Frame(self.sds_context, "as.frame", [self])
+
+
+OperationNode.to_frame = to_frame
 
+
+class Frame(OperationNode):
     _pd_dataframe: pd.DataFrame
 
     def __init__(
diff --git a/src/main/python/systemds/operator/nodes/matrix.py 
b/src/main/python/systemds/operator/nodes/matrix.py
index cddfad6d46..208e248ec6 100644
--- a/src/main/python/systemds/operator/nodes/matrix.py
+++ b/src/main/python/systemds/operator/nodes/matrix.py
@@ -41,6 +41,13 @@ from systemds.utils.helpers import (
 )
 
 
+def to_matrix(self):
+    return Matrix(self.sds_context, "as.matrix", [self])
+
+
+OperationNode.to_matrix = to_matrix
+
+
 class Matrix(OperationNode):
     _np_array: np.array
 
@@ -842,5 +849,89 @@ class Matrix(OperationNode):
 
         return ifft_node
 
+    def triu(self, include_diagonal=True, return_values=True) -> "Matrix":
+        """Selects the upper triangular part of a matrix, configurable to 
include the diagonal and return values or ones
+
+        :param include_diagonal: boolean, default True
+        :param return_values: boolean, default True, if set to False returns 
ones
+        :return: `Matrix`
+        """
+        named_input_nodes = {
+            "target": self,
+            "diag": self.sds_context.scalar(include_diagonal),
+            "values": self.sds_context.scalar(return_values),
+        }
+        return Matrix(
+            self.sds_context, "upper.tri", named_input_nodes=named_input_nodes
+        )
+
+    def tril(self, include_diagonal=True, return_values=True) -> "Matrix":
+        """Selects the lower triangular part of a matrix, configurable to 
include the diagonal and return values or ones
+
+        :param include_diagonal: boolean, default True
+        :param return_values: boolean, default True, if set to False returns 
ones
+        :return: `Matrix`
+        """
+        named_input_nodes = {
+            "target": self,
+            "diag": self.sds_context.scalar(include_diagonal),
+            "values": self.sds_context.scalar(return_values),
+        }
+        return Matrix(
+            self.sds_context, "lower.tri", named_input_nodes=named_input_nodes
+        )
+
+    def argmin(self, axis: int = None) -> "OperationNode":
+        """Return the index of the minimum if axis is None or a column vector 
for row-wise / column-wise minima
+        computation.
+
+        :param axis: can be 0 or 1 to do either row or column sums
+        :return: `Matrix` representing operation for row / columns or 'Scalar' 
representing operation for complete
+        """
+        if axis == 0:
+            return Matrix(self.sds_context, "rowIndexMin", [self.t()])
+        elif axis == 1:
+            return Matrix(self.sds_context, "rowIndexMin", [self])
+        elif axis is None:
+            return Matrix(
+                self.sds_context,
+                "rowIndexMin",
+                [self.reshape(1, self.nCol() * self.nRow())],
+            ).to_scalar()
+        else:
+            raise ValueError(
+                f"Axis has to be either 0, 1 or None, for column, row or 
complete {self.operation}"
+            )
+
+    def argmax(self, axis: int = None) -> "OperationNode":
+        """Return the index of the maximum if axis is None or a column vector 
for row-wise / column-wise maxima
+        computation.
+
+        :param axis: can be 0 or 1 to do either row or column sums
+        :return: `Matrix` representing operation for row / columns or 'Scalar' 
representing operation for complete
+        """
+        if axis == 0:
+            return Matrix(self.sds_context, "rowIndexMax", [self.t()])
+        elif axis == 1:
+            return Matrix(self.sds_context, "rowIndexMax", [self])
+        elif axis is None:
+            return Matrix(
+                self.sds_context,
+                "rowIndexMax",
+                [self.reshape(1, self.nCol() * self.nRow())],
+            ).to_scalar()
+        else:
+            raise ValueError(
+                f"Axis has to be either 0, 1 or None, for column, row or 
complete {self.operation}"
+            )
+
+    def reshape(self, rows, cols=1):
+        """Gives a new shape to a matrix without changing its data.
+
+        :param rows: number of rows
+        :param cols: number of columns, defaults to 1
+        :return: `Matrix` representing operation"""
+        return Matrix(self.sds_context, "matrix", [self, rows, cols])
+
     def __str__(self):
         return "MatrixNode"
diff --git a/src/main/python/systemds/operator/nodes/scalar.py 
b/src/main/python/systemds/operator/nodes/scalar.py
index 1d87ce5637..9224bba67b 100644
--- a/src/main/python/systemds/operator/nodes/scalar.py
+++ b/src/main/python/systemds/operator/nodes/scalar.py
@@ -32,7 +32,13 @@ from systemds.utils.consts import (
     VALID_ARITHMETIC_TYPES,
     VALID_INPUT_TYPES,
 )
-from systemds.utils.converters import numpy_to_matrix_block
+
+
+def to_scalar(self):
+    return Scalar(self.sds_context, "as.scalar", [self])
+
+
+OperationNode.to_scalar = to_scalar
 
 
 class Scalar(OperationNode):
@@ -67,6 +73,8 @@ class Scalar(OperationNode):
         named_input_vars: Dict[str, str],
     ) -> str:
         if self.__assign:
+            if type(self.operation) is bool:
+                self.operation = "TRUE" if self.operation else "FALSE"
             return f"{var_name}={self.operation};"
         else:
             return super().code_line(var_name, unnamed_input_vars, 
named_input_vars)
@@ -289,6 +297,20 @@ class Scalar(OperationNode):
         """
         return Scalar(self.sds_context, "toString", [self], 
named_input_nodes=kwargs)
 
+    def to_int(self) -> "Scalar":
+        return Scalar(
+            self.sds_context,
+            "as.integer",
+            [self],
+        )
+
+    def to_boolean(self) -> "Scalar":
+        return Scalar(
+            self.sds_context,
+            "as.logical",
+            [self],
+        )
+
     def isNA(self) -> "Scalar":
         """Computes a boolean indicator matrix of the same shape as the input, 
indicating where NA (not available)
         values are located. Currently, NA is only capturing NaN values.
diff --git a/src/main/python/systemds/operator/operation_node.py 
b/src/main/python/systemds/operator/operation_node.py
index 41c40df900..c93141fb32 100644
--- a/src/main/python/systemds/operator/operation_node.py
+++ b/src/main/python/systemds/operator/operation_node.py
@@ -202,3 +202,12 @@ class OperationNode(DAGNode):
         To get the returned string look at the stdout of SystemDSContext.
         """
         return OperationNode(self.sds_context, "print", [self], kwargs)
+
+    def to_frame(self):
+        raise NotImplementedError("should have been overwritten in frame.py")
+
+    def to_matrix(self):
+        raise NotImplementedError("should have been overwritten in matrix.py")
+
+    def to_scalar(self):
+        raise NotImplementedError("should have been overwritten in scalar.py")
diff --git a/src/main/python/systemds/utils/converters.py 
b/src/main/python/systemds/utils/converters.py
index 5ce3fbde57..8551b8ce6a 100644
--- a/src/main/python/systemds/utils/converters.py
+++ b/src/main/python/systemds/utils/converters.py
@@ -104,7 +104,7 @@ def pandas_to_frame_block(sds, pd_df: pd.DataFrame):
         np.dtype(np.int32): jvm.org.apache.sysds.common.Types.ValueType.INT32,
         np.dtype(np.float32): jvm.org.apache.sysds.common.Types.ValueType.FP32,
         np.dtype(np.uint8): jvm.org.apache.sysds.common.Types.ValueType.UINT8,
-        np.dtype(np.character): 
jvm.org.apache.sysds.common.Types.ValueType.CHARACTER,
+        np.dtype(np.str_): 
jvm.org.apache.sysds.common.Types.ValueType.CHARACTER,
     }
     schema = []
     col_names = []
diff --git a/src/main/python/tests/matrix/test_arg_min_max.py 
b/src/main/python/tests/matrix/test_arg_min_max.py
new file mode 100644
index 0000000000..602a9dfee2
--- /dev/null
+++ b/src/main/python/tests/matrix/test_arg_min_max.py
@@ -0,0 +1,95 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+
+np.random.seed(7)
+m = np.array([[1, 2, 3], [6, 5, 4], [8, 7, 9]])
+M = np.random.random_integers(9, size=300).reshape(100, 3)
+p = np.array([0.25, 0.5, 0.75])
+m2 = np.array([1, 2, 3, 4, 5])
+w2 = np.array([1, 1, 1, 1, 5])
+
+
+def weighted_quantiles(values, weights, quantiles=0.5):
+    i = np.argsort(values)
+    c = np.cumsum(weights[i])
+    return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]]
+
+
+class TestARGMINMAX(unittest.TestCase):
+    def setUp(self):
+        self.sds = SystemDSContext()
+
+    def tearDown(self):
+        self.sds.close()
+
+    def test_argmin_basic1(self):
+        sds_input = self.sds.from_numpy(m)
+        sds_result = sds_input.argmin(0).compute()
+        np_result = np.argmin(m, axis=0).reshape(-1, 1)
+        assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+    def test_argmin_basic2(self):
+        sds_input = self.sds.from_numpy(m)
+        sds_result = sds_input.argmin(1).compute()
+        np_result = np.argmin(m, axis=1).reshape(-1, 1)
+        assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+    def test_argmin_basic3(self):
+        sds_input = self.sds.from_numpy(m)
+        sds_result = sds_input.argmin().compute(verbose=True)
+        np_result = np.argmin(m)
+        assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+    def test_argmin_basic4(self):
+        sds_input = self.sds.from_numpy(m)
+        with self.assertRaises(ValueError):
+            sds_input.argmin(3)
+
+    def test_argmax_basic1(self):
+        sds_input = self.sds.from_numpy(m)
+        sds_result = sds_input.argmax(0).compute()
+        np_result = np.argmax(m, axis=0).reshape(-1, 1)
+        assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+    def test_argmax_basic2(self):
+        sds_input = self.sds.from_numpy(m)
+        sds_result = sds_input.argmax(1).compute()
+        np_result = np.argmax(m, axis=1).reshape(-1, 1)
+        assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+    def test_argmax_basic3(self):
+        sds_input = self.sds.from_numpy(m)
+        sds_result = sds_input.argmax().compute()
+        np_result = np.argmax(m)
+        assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+    def test_argmax_basic4(self):
+        sds_input = self.sds.from_numpy(m)
+        with self.assertRaises(ValueError):
+            sds_input.argmax(3)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/main/python/tests/matrix/test_casting.py 
b/src/main/python/tests/matrix/test_casting.py
new file mode 100644
index 0000000000..f990ec09d3
--- /dev/null
+++ b/src/main/python/tests/matrix/test_casting.py
@@ -0,0 +1,76 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+from pandas import DataFrame
+from numpy import ndarray
+
+
+class TestDIAG(unittest.TestCase):
+    def setUp(self):
+        self.sds = SystemDSContext()
+
+    def tearDown(self):
+        self.sds.close()
+
+    def test_casting_basic1(self):
+        sds_input = self.sds.from_numpy(np.array([[1]]))
+        sds_result = sds_input.to_scalar().compute()
+        self.assertTrue(type(sds_result) == float)
+
+    def test_casting_basic2(self):
+        sds_input = self.sds.from_numpy(np.array([[1]]))
+        sds_result = sds_input.to_frame().compute()
+        self.assertTrue(type(sds_result) == DataFrame)
+
+    def test_casting_basic3(self):
+        sds_result = self.sds.scalar(1.0).to_frame().compute()
+        self.assertTrue(type(sds_result) == DataFrame)
+
+    def test_casting_basic4(self):
+        sds_result = self.sds.scalar(1.0).to_matrix().compute()
+        self.assertTrue(type(sds_result) == ndarray)
+
+    def test_casting_basic5(self):
+        ar = ndarray((2, 2))
+        df = DataFrame(ar)
+        sds_result = self.sds.from_pandas(df).to_matrix().compute()
+        self.assertTrue(type(sds_result) == ndarray and np.allclose(ar, 
sds_result))
+
+    def test_casting_basic6(self):
+        ar = ndarray((1, 1))
+        df = DataFrame(ar)
+        sds_result = self.sds.from_pandas(df).to_scalar().compute()
+        self.assertTrue(type(sds_result) == float)
+
+    def test_casting_basic7(self):
+        sds_result = self.sds.scalar(1.0).to_int().compute()
+        self.assertTrue(type(sds_result) == int and sds_result)
+
+    def test_casting_basic8(self):
+        sds_result = self.sds.scalar(1.0).to_boolean().compute()
+        self.assertTrue(type(sds_result) == bool)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/main/python/tests/matrix/test_triangular.py 
b/src/main/python/tests/matrix/test_triangular.py
new file mode 100644
index 0000000000..f7ea2d840b
--- /dev/null
+++ b/src/main/python/tests/matrix/test_triangular.py
@@ -0,0 +1,104 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+
+m1 = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
+
+m2 = np.random.random((10, 10))
+
+
+class TestTRIANGULAR(unittest.TestCase):
+    def setUp(self):
+        self.sds = SystemDSContext()
+
+    def tearDown(self):
+        self.sds.close()
+
+    def test_triu_basic1(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.triu().compute()
+        np_result = np.triu(m1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_triu_basic2(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.triu(include_diagonal=False).compute()
+        np_result = np.triu(m1, 1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_triu_basic3(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.triu(return_values=False).compute()
+        np_result = np.triu(m1) > 0
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_triu_basic4(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.triu(
+            return_values=False, include_diagonal=False
+        ).compute()
+        np_result = np.triu(m1, 1) > 0
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_triu_random(self):
+        sds_input = self.sds.from_numpy(m2)
+        sds_result = sds_input.triu().compute()
+        np_result = np.triu(m2)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_tril_basic1(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.tril().compute()
+        np_result = np.tril(m1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_tril_basic2(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.tril(include_diagonal=False).compute()
+        np_result = np.tril(m1, -1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_tril_basic3(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.tril(return_values=False).compute()
+        np_result = np.tril(m1) > 0
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_tril_basic4(self):
+        sds_input = self.sds.from_numpy(m1)
+        sds_result = sds_input.tril(
+            return_values=False, include_diagonal=False
+        ).compute()
+        np_result = np.tril(m1, -1) > 0
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_tril_random(self):
+        sds_input = self.sds.from_numpy(m2)
+        sds_result = sds_input.tril().compute()
+        np_result = np.tril(m2)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+
+if __name__ == "__main__":
+    unittest.main()

(systemds) branch main updated: [SYSTEMDS-3758] Python API Builtin triu, tril, argmin, argmax and casting Scalar <-> Matrix <-> Frame

Reply via email to