This is an automated email from the ASF dual-hosted git repository.
estrauss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new d80e3a6d04 [SYSTEMDS-3758] Python API Builtin triu, tril, argmin,
argmax and casting Scalar <-> Matrix <-> Frame
d80e3a6d04 is described below
commit d80e3a6d0493eed0043a494dbaeb875d5441a191
Author: e-strauss <[email protected]>
AuthorDate: Thu Sep 26 17:16:41 2024 +0200
[SYSTEMDS-3758] Python API Builtin triu, tril, argmin, argmax and casting
Scalar <-> Matrix <-> Frame
Closes #2113
---
src/main/python/systemds/operator/nodes/frame.py | 8 +-
src/main/python/systemds/operator/nodes/matrix.py | 91 ++++++++++++++++++
src/main/python/systemds/operator/nodes/scalar.py | 24 ++++-
.../python/systemds/operator/operation_node.py | 9 ++
src/main/python/systemds/utils/converters.py | 2 +-
src/main/python/tests/matrix/test_arg_min_max.py | 95 +++++++++++++++++++
src/main/python/tests/matrix/test_casting.py | 76 +++++++++++++++
src/main/python/tests/matrix/test_triangular.py | 104 +++++++++++++++++++++
8 files changed, 406 insertions(+), 3 deletions(-)
diff --git a/src/main/python/systemds/operator/nodes/frame.py
b/src/main/python/systemds/operator/nodes/frame.py
index 2d9e231954..9c76fc5aca 100644
--- a/src/main/python/systemds/operator/nodes/frame.py
+++ b/src/main/python/systemds/operator/nodes/frame.py
@@ -44,8 +44,14 @@ if TYPE_CHECKING:
from systemds.context import SystemDSContext
-class Frame(OperationNode):
+def to_frame(self):
+ return Frame(self.sds_context, "as.frame", [self])
+
+
+OperationNode.to_frame = to_frame
+
+class Frame(OperationNode):
_pd_dataframe: pd.DataFrame
def __init__(
diff --git a/src/main/python/systemds/operator/nodes/matrix.py
b/src/main/python/systemds/operator/nodes/matrix.py
index cddfad6d46..208e248ec6 100644
--- a/src/main/python/systemds/operator/nodes/matrix.py
+++ b/src/main/python/systemds/operator/nodes/matrix.py
@@ -41,6 +41,13 @@ from systemds.utils.helpers import (
)
+def to_matrix(self):
+ return Matrix(self.sds_context, "as.matrix", [self])
+
+
+OperationNode.to_matrix = to_matrix
+
+
class Matrix(OperationNode):
_np_array: np.array
@@ -842,5 +849,89 @@ class Matrix(OperationNode):
return ifft_node
+ def triu(self, include_diagonal=True, return_values=True) -> "Matrix":
+ """Selects the upper triangular part of a matrix, configurable to
include the diagonal and return values or ones
+
+ :param include_diagonal: boolean, default True
+ :param return_values: boolean, default True, if set to False returns
ones
+ :return: `Matrix`
+ """
+ named_input_nodes = {
+ "target": self,
+ "diag": self.sds_context.scalar(include_diagonal),
+ "values": self.sds_context.scalar(return_values),
+ }
+ return Matrix(
+ self.sds_context, "upper.tri", named_input_nodes=named_input_nodes
+ )
+
+ def tril(self, include_diagonal=True, return_values=True) -> "Matrix":
+ """Selects the lower triangular part of a matrix, configurable to
include the diagonal and return values or ones
+
+ :param include_diagonal: boolean, default True
+ :param return_values: boolean, default True, if set to False returns
ones
+ :return: `Matrix`
+ """
+ named_input_nodes = {
+ "target": self,
+ "diag": self.sds_context.scalar(include_diagonal),
+ "values": self.sds_context.scalar(return_values),
+ }
+ return Matrix(
+ self.sds_context, "lower.tri", named_input_nodes=named_input_nodes
+ )
+
+ def argmin(self, axis: int = None) -> "OperationNode":
+ """Return the index of the minimum if axis is None or a column vector
for row-wise / column-wise minima
+ computation.
+
+ :param axis: can be 0 or 1 to do either row or column sums
+ :return: `Matrix` representing operation for row / columns or 'Scalar'
representing operation for complete
+ """
+ if axis == 0:
+ return Matrix(self.sds_context, "rowIndexMin", [self.t()])
+ elif axis == 1:
+ return Matrix(self.sds_context, "rowIndexMin", [self])
+ elif axis is None:
+ return Matrix(
+ self.sds_context,
+ "rowIndexMin",
+ [self.reshape(1, self.nCol() * self.nRow())],
+ ).to_scalar()
+ else:
+ raise ValueError(
+ f"Axis has to be either 0, 1 or None, for column, row or
complete {self.operation}"
+ )
+
+ def argmax(self, axis: int = None) -> "OperationNode":
+ """Return the index of the maximum if axis is None or a column vector
for row-wise / column-wise maxima
+ computation.
+
+ :param axis: can be 0 or 1 to do either row or column sums
+ :return: `Matrix` representing operation for row / columns or 'Scalar'
representing operation for complete
+ """
+ if axis == 0:
+ return Matrix(self.sds_context, "rowIndexMax", [self.t()])
+ elif axis == 1:
+ return Matrix(self.sds_context, "rowIndexMax", [self])
+ elif axis is None:
+ return Matrix(
+ self.sds_context,
+ "rowIndexMax",
+ [self.reshape(1, self.nCol() * self.nRow())],
+ ).to_scalar()
+ else:
+ raise ValueError(
+ f"Axis has to be either 0, 1 or None, for column, row or
complete {self.operation}"
+ )
+
+ def reshape(self, rows, cols=1):
+ """Gives a new shape to a matrix without changing its data.
+
+ :param rows: number of rows
+ :param cols: number of columns, defaults to 1
+ :return: `Matrix` representing operation"""
+ return Matrix(self.sds_context, "matrix", [self, rows, cols])
+
def __str__(self):
return "MatrixNode"
diff --git a/src/main/python/systemds/operator/nodes/scalar.py
b/src/main/python/systemds/operator/nodes/scalar.py
index 1d87ce5637..9224bba67b 100644
--- a/src/main/python/systemds/operator/nodes/scalar.py
+++ b/src/main/python/systemds/operator/nodes/scalar.py
@@ -32,7 +32,13 @@ from systemds.utils.consts import (
VALID_ARITHMETIC_TYPES,
VALID_INPUT_TYPES,
)
-from systemds.utils.converters import numpy_to_matrix_block
+
+
+def to_scalar(self):
+ return Scalar(self.sds_context, "as.scalar", [self])
+
+
+OperationNode.to_scalar = to_scalar
class Scalar(OperationNode):
@@ -67,6 +73,8 @@ class Scalar(OperationNode):
named_input_vars: Dict[str, str],
) -> str:
if self.__assign:
+ if type(self.operation) is bool:
+ self.operation = "TRUE" if self.operation else "FALSE"
return f"{var_name}={self.operation};"
else:
return super().code_line(var_name, unnamed_input_vars,
named_input_vars)
@@ -289,6 +297,20 @@ class Scalar(OperationNode):
"""
return Scalar(self.sds_context, "toString", [self],
named_input_nodes=kwargs)
+ def to_int(self) -> "Scalar":
+ return Scalar(
+ self.sds_context,
+ "as.integer",
+ [self],
+ )
+
+ def to_boolean(self) -> "Scalar":
+ return Scalar(
+ self.sds_context,
+ "as.logical",
+ [self],
+ )
+
def isNA(self) -> "Scalar":
"""Computes a boolean indicator matrix of the same shape as the input,
indicating where NA (not available)
values are located. Currently, NA is only capturing NaN values.
diff --git a/src/main/python/systemds/operator/operation_node.py
b/src/main/python/systemds/operator/operation_node.py
index 41c40df900..c93141fb32 100644
--- a/src/main/python/systemds/operator/operation_node.py
+++ b/src/main/python/systemds/operator/operation_node.py
@@ -202,3 +202,12 @@ class OperationNode(DAGNode):
To get the returned string look at the stdout of SystemDSContext.
"""
return OperationNode(self.sds_context, "print", [self], kwargs)
+
+ def to_frame(self):
+ raise NotImplementedError("should have been overwritten in frame.py")
+
+ def to_matrix(self):
+ raise NotImplementedError("should have been overwritten in matrix.py")
+
+ def to_scalar(self):
+ raise NotImplementedError("should have been overwritten in scalar.py")
diff --git a/src/main/python/systemds/utils/converters.py
b/src/main/python/systemds/utils/converters.py
index 5ce3fbde57..8551b8ce6a 100644
--- a/src/main/python/systemds/utils/converters.py
+++ b/src/main/python/systemds/utils/converters.py
@@ -104,7 +104,7 @@ def pandas_to_frame_block(sds, pd_df: pd.DataFrame):
np.dtype(np.int32): jvm.org.apache.sysds.common.Types.ValueType.INT32,
np.dtype(np.float32): jvm.org.apache.sysds.common.Types.ValueType.FP32,
np.dtype(np.uint8): jvm.org.apache.sysds.common.Types.ValueType.UINT8,
- np.dtype(np.character):
jvm.org.apache.sysds.common.Types.ValueType.CHARACTER,
+ np.dtype(np.str_):
jvm.org.apache.sysds.common.Types.ValueType.CHARACTER,
}
schema = []
col_names = []
diff --git a/src/main/python/tests/matrix/test_arg_min_max.py
b/src/main/python/tests/matrix/test_arg_min_max.py
new file mode 100644
index 0000000000..602a9dfee2
--- /dev/null
+++ b/src/main/python/tests/matrix/test_arg_min_max.py
@@ -0,0 +1,95 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+
+np.random.seed(7)
+m = np.array([[1, 2, 3], [6, 5, 4], [8, 7, 9]])
+M = np.random.random_integers(9, size=300).reshape(100, 3)
+p = np.array([0.25, 0.5, 0.75])
+m2 = np.array([1, 2, 3, 4, 5])
+w2 = np.array([1, 1, 1, 1, 5])
+
+
+def weighted_quantiles(values, weights, quantiles=0.5):
+ i = np.argsort(values)
+ c = np.cumsum(weights[i])
+ return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]]
+
+
+class TestARGMINMAX(unittest.TestCase):
+ def setUp(self):
+ self.sds = SystemDSContext()
+
+ def tearDown(self):
+ self.sds.close()
+
+ def test_argmin_basic1(self):
+ sds_input = self.sds.from_numpy(m)
+ sds_result = sds_input.argmin(0).compute()
+ np_result = np.argmin(m, axis=0).reshape(-1, 1)
+ assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+ def test_argmin_basic2(self):
+ sds_input = self.sds.from_numpy(m)
+ sds_result = sds_input.argmin(1).compute()
+ np_result = np.argmin(m, axis=1).reshape(-1, 1)
+ assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+ def test_argmin_basic3(self):
+ sds_input = self.sds.from_numpy(m)
+ sds_result = sds_input.argmin().compute(verbose=True)
+ np_result = np.argmin(m)
+ assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+ def test_argmin_basic4(self):
+ sds_input = self.sds.from_numpy(m)
+ with self.assertRaises(ValueError):
+ sds_input.argmin(3)
+
+ def test_argmax_basic1(self):
+ sds_input = self.sds.from_numpy(m)
+ sds_result = sds_input.argmax(0).compute()
+ np_result = np.argmax(m, axis=0).reshape(-1, 1)
+ assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+ def test_argmax_basic2(self):
+ sds_input = self.sds.from_numpy(m)
+ sds_result = sds_input.argmax(1).compute()
+ np_result = np.argmax(m, axis=1).reshape(-1, 1)
+ assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+ def test_argmax_basic3(self):
+ sds_input = self.sds.from_numpy(m)
+ sds_result = sds_input.argmax().compute()
+ np_result = np.argmax(m)
+ assert np.allclose(sds_result - 1, np_result, 1e-9)
+
+ def test_argmax_basic4(self):
+ sds_input = self.sds.from_numpy(m)
+ with self.assertRaises(ValueError):
+ sds_input.argmax(3)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/src/main/python/tests/matrix/test_casting.py
b/src/main/python/tests/matrix/test_casting.py
new file mode 100644
index 0000000000..f990ec09d3
--- /dev/null
+++ b/src/main/python/tests/matrix/test_casting.py
@@ -0,0 +1,76 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+from pandas import DataFrame
+from numpy import ndarray
+
+
+class TestDIAG(unittest.TestCase):
+ def setUp(self):
+ self.sds = SystemDSContext()
+
+ def tearDown(self):
+ self.sds.close()
+
+ def test_casting_basic1(self):
+ sds_input = self.sds.from_numpy(np.array([[1]]))
+ sds_result = sds_input.to_scalar().compute()
+ self.assertTrue(type(sds_result) == float)
+
+ def test_casting_basic2(self):
+ sds_input = self.sds.from_numpy(np.array([[1]]))
+ sds_result = sds_input.to_frame().compute()
+ self.assertTrue(type(sds_result) == DataFrame)
+
+ def test_casting_basic3(self):
+ sds_result = self.sds.scalar(1.0).to_frame().compute()
+ self.assertTrue(type(sds_result) == DataFrame)
+
+ def test_casting_basic4(self):
+ sds_result = self.sds.scalar(1.0).to_matrix().compute()
+ self.assertTrue(type(sds_result) == ndarray)
+
+ def test_casting_basic5(self):
+ ar = ndarray((2, 2))
+ df = DataFrame(ar)
+ sds_result = self.sds.from_pandas(df).to_matrix().compute()
+ self.assertTrue(type(sds_result) == ndarray and np.allclose(ar,
sds_result))
+
+ def test_casting_basic6(self):
+ ar = ndarray((1, 1))
+ df = DataFrame(ar)
+ sds_result = self.sds.from_pandas(df).to_scalar().compute()
+ self.assertTrue(type(sds_result) == float)
+
+ def test_casting_basic7(self):
+ sds_result = self.sds.scalar(1.0).to_int().compute()
+ self.assertTrue(type(sds_result) == int and sds_result)
+
+ def test_casting_basic8(self):
+ sds_result = self.sds.scalar(1.0).to_boolean().compute()
+ self.assertTrue(type(sds_result) == bool)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/src/main/python/tests/matrix/test_triangular.py
b/src/main/python/tests/matrix/test_triangular.py
new file mode 100644
index 0000000000..f7ea2d840b
--- /dev/null
+++ b/src/main/python/tests/matrix/test_triangular.py
@@ -0,0 +1,104 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+
+m1 = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
+
+m2 = np.random.random((10, 10))
+
+
+class TestTRIANGULAR(unittest.TestCase):
+ def setUp(self):
+ self.sds = SystemDSContext()
+
+ def tearDown(self):
+ self.sds.close()
+
+ def test_triu_basic1(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.triu().compute()
+ np_result = np.triu(m1)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_triu_basic2(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.triu(include_diagonal=False).compute()
+ np_result = np.triu(m1, 1)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_triu_basic3(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.triu(return_values=False).compute()
+ np_result = np.triu(m1) > 0
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_triu_basic4(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.triu(
+ return_values=False, include_diagonal=False
+ ).compute()
+ np_result = np.triu(m1, 1) > 0
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_triu_random(self):
+ sds_input = self.sds.from_numpy(m2)
+ sds_result = sds_input.triu().compute()
+ np_result = np.triu(m2)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_tril_basic1(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.tril().compute()
+ np_result = np.tril(m1)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_tril_basic2(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.tril(include_diagonal=False).compute()
+ np_result = np.tril(m1, -1)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_tril_basic3(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.tril(return_values=False).compute()
+ np_result = np.tril(m1) > 0
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_tril_basic4(self):
+ sds_input = self.sds.from_numpy(m1)
+ sds_result = sds_input.tril(
+ return_values=False, include_diagonal=False
+ ).compute()
+ np_result = np.tril(m1, -1) > 0
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_tril_random(self):
+ sds_input = self.sds.from_numpy(m2)
+ sds_result = sds_input.tril().compute()
+ np_result = np.tril(m2)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+
+if __name__ == "__main__":
+ unittest.main()