This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 06ccfd629d [SYSTEMDS-3749] Python API missing builtin sd
06ccfd629d is described below
commit 06ccfd629ddaf084ae35aa095c3120dbceef68a3
Author: e-strauss <[email protected]>
AuthorDate: Tue Sep 3 22:20:45 2024 +0200
[SYSTEMDS-3749] Python API missing builtin sd
Also included in this commit is a reformatting of the Matrix
Operation node.
Closes #2092
---
src/main/python/systemds/operator/nodes/matrix.py | 515 +++++++++++++---------
src/main/python/tests/matrix/test_aggregations.py | 224 +++++++---
2 files changed, 455 insertions(+), 284 deletions(-)
diff --git a/src/main/python/systemds/operator/nodes/matrix.py
b/src/main/python/systemds/operator/nodes/matrix.py
index 3f02daa343..fafb815ca4 100644
--- a/src/main/python/systemds/operator/nodes/matrix.py
+++ b/src/main/python/systemds/operator/nodes/matrix.py
@@ -29,21 +29,31 @@ from systemds.operator.operation_node import OperationNode
from systemds.operator.nodes.multi_return import MultiReturn
from systemds.operator.nodes.scalar import Scalar
from systemds.script_building.dag import OutputType
-from systemds.utils.consts import (BINARY_OPERATIONS, VALID_ARITHMETIC_TYPES,
- VALID_INPUT_TYPES)
-from systemds.utils.converters import (matrix_block_to_numpy,
- numpy_to_matrix_block)
-from systemds.utils.helpers import check_is_empty_slice,
check_no_less_than_zero, get_slice_string
+from systemds.utils.consts import (
+ BINARY_OPERATIONS,
+ VALID_ARITHMETIC_TYPES,
+ VALID_INPUT_TYPES,
+)
+from systemds.utils.converters import matrix_block_to_numpy,
numpy_to_matrix_block
+from systemds.utils.helpers import (
+ check_is_empty_slice,
+ check_no_less_than_zero,
+ get_slice_string,
+)
class Matrix(OperationNode):
_np_array: np.array
- def __init__(self, sds_context, operation: str,
- unnamed_input_nodes: Union[str,
- Iterable[VALID_INPUT_TYPES]] =
None,
- named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None,
- local_data: np.array = None, brackets: bool = False) ->
'Matrix':
+ def __init__(
+ self,
+ sds_context,
+ operation: str,
+ unnamed_input_nodes: Union[str, Iterable[VALID_INPUT_TYPES]] = None,
+ named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None,
+ local_data: np.array = None,
+ brackets: bool = False,
+ ) -> "Matrix":
is_python_local_data = False
if local_data is not None:
@@ -52,17 +62,33 @@ class Matrix(OperationNode):
else:
self._np_array = None
- super().__init__(sds_context, operation, unnamed_input_nodes,
- named_input_nodes, OutputType.MATRIX,
is_python_local_data, brackets)
-
- def pass_python_data_to_prepared_script(self, sds, var_name: str,
prepared_script: JavaObject) -> None:
- assert self.is_python_local_data, 'Can only pass data to prepared
script if it is python local!'
+ super().__init__(
+ sds_context,
+ operation,
+ unnamed_input_nodes,
+ named_input_nodes,
+ OutputType.MATRIX,
+ is_python_local_data,
+ brackets,
+ )
+
+ def pass_python_data_to_prepared_script(
+ self, sds, var_name: str, prepared_script: JavaObject
+ ) -> None:
+ assert (
+ self.is_python_local_data
+ ), "Can only pass data to prepared script if it is python local!"
if self._is_numpy():
- prepared_script.setMatrix(var_name, numpy_to_matrix_block(
- sds, self._np_array), True) # True for reuse
-
- def code_line(self, var_name: str, unnamed_input_vars: Sequence[str],
- named_input_vars: Dict[str, str]) -> str:
+ prepared_script.setMatrix(
+ var_name, numpy_to_matrix_block(sds, self._np_array), True
+ ) # True for reuse
+
+ def code_line(
+ self,
+ var_name: str,
+ unnamed_input_vars: Sequence[str],
+ named_input_vars: Dict[str, str],
+ ) -> str:
code_line = super().code_line(var_name, unnamed_input_vars,
named_input_vars)
if self._is_numpy():
code_line = code_line.format(file_name=var_name)
@@ -70,94 +96,96 @@ class Matrix(OperationNode):
def compute(self, verbose: bool = False, lineage: bool = False) ->
np.array:
if self._is_numpy():
- self.sds_context._log.info('Numpy Array - No Compilation
necessary')
+ self.sds_context._log.info("Numpy Array - No Compilation
necessary")
return self._np_array
else:
return super().compute(verbose, lineage)
def _parse_output_result_variables(self, result_variables):
- return matrix_block_to_numpy(self.sds_context.java_gateway.jvm,
-
result_variables.getMatrixBlock(self._script.out_var_name[0]))
+ return matrix_block_to_numpy(
+ self.sds_context.java_gateway.jvm,
+ result_variables.getMatrixBlock(self._script.out_var_name[0]),
+ )
def _is_numpy(self) -> bool:
return self._np_array is not None
- def __add__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '+', [self, other])
+ def __add__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "+", [self, other])
# Left hand side
- def __radd__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '+', [other, self])
+ def __radd__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "+", [other, self])
- def __sub__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '-', [self, other])
+ def __sub__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "-", [self, other])
# Left hand side
- def __rsub__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '-', [other, self])
+ def __rsub__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "-", [other, self])
- def __mul__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '*', [self, other])
+ def __mul__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "*", [self, other])
- def __rmul__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '*', [other, self])
+ def __rmul__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "*", [other, self])
- def __truediv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '/', [self, other])
+ def __truediv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "/", [self, other])
- def __rtruediv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '/', [other, self])
+ def __rtruediv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "/", [other, self])
- def __floordiv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '//', [self, other])
+ def __floordiv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "//", [self, other])
- def __rfloordiv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix':
- return Matrix(self.sds_context, '//', [other, self])
+ def __rfloordiv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix":
+ return Matrix(self.sds_context, "//", [other, self])
- def __lt__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '<', [self, other])
+ def __lt__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "<", [self, other])
- def __rlt__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '<', [other, self])
+ def __rlt__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "<", [other, self])
- def __le__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '<=', [self, other])
+ def __le__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "<=", [self, other])
- def __rle__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '<=', [other, self])
+ def __rle__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "<=", [other, self])
- def __gt__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '>', [self, other])
+ def __gt__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, ">", [self, other])
- def __rgt__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '>', [other, self])
+ def __rgt__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, ">", [other, self])
- def __ge__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '>=', [self, other])
+ def __ge__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, ">=", [self, other])
- def __rge__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '>=', [other, self])
+ def __rge__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, ">=", [other, self])
- def __eq__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '==', [self, other])
+ def __eq__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "==", [self, other])
- def __req__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '==', [other, self])
+ def __req__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "==", [other, self])
- def __ne__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '!=', [self, other])
+ def __ne__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "!=", [self, other])
- def __rne__(self, other) -> 'Matrix':
- return Matrix(self.sds_context, '!=', [other, self])
+ def __rne__(self, other) -> "Matrix":
+ return Matrix(self.sds_context, "!=", [other, self])
- def __matmul__(self, other: 'Matrix') -> 'Matrix':
- return Matrix(self.sds_context, '%*%', [self, other])
+ def __matmul__(self, other: "Matrix") -> "Matrix":
+ return Matrix(self.sds_context, "%*%", [self, other])
- def nRow(self) -> 'Scalar':
- return Scalar(self.sds_context, 'nrow', [self])
+ def nRow(self) -> "Scalar":
+ return Scalar(self.sds_context, "nrow", [self])
- def nCol(self) -> 'Scalar':
- return Scalar(self.sds_context, 'ncol', [self])
+ def nCol(self) -> "Scalar":
+ return Scalar(self.sds_context, "ncol", [self])
def __getitem__(self, i):
if isinstance(i, tuple) and len(i) > 2:
@@ -165,138 +193,154 @@ class Matrix(OperationNode):
elif isinstance(i, list):
check_no_less_than_zero(i)
slice = self.sds_context.from_numpy(np.array(i)) + 1
- select = Matrix(self.sds_context, "table",
- [slice, 1, self.nRow(), 1])
- ret = Matrix(self.sds_context, "removeEmpty", [], {
- 'target': self, 'margin': '"rows"', 'select': select})
+ select = Matrix(self.sds_context, "table", [slice, 1, self.nRow(),
1])
+ ret = Matrix(
+ self.sds_context,
+ "removeEmpty",
+ [],
+ {"target": self, "margin": '"rows"', "select": select},
+ )
return ret
elif isinstance(i, tuple) and isinstance(i[0], list) and
isinstance(i[1], list):
raise NotImplementedError("double slicing is not supported yet")
- elif isinstance(i, tuple) and check_is_empty_slice(i[0]) and
isinstance(i[1], list):
+ elif (
+ isinstance(i, tuple)
+ and check_is_empty_slice(i[0])
+ and isinstance(i[1], list)
+ ):
check_no_less_than_zero(i[1])
slice = self.sds_context.from_numpy(np.array(i[1])) + 1
- select = Matrix(self.sds_context, "table",
- [slice, 1, self.nCol(), 1])
- ret = Matrix(self.sds_context, "removeEmpty", [], {
- 'target': self, 'margin': '"cols"', 'select': select})
+ select = Matrix(self.sds_context, "table", [slice, 1, self.nCol(),
1])
+ ret = Matrix(
+ self.sds_context,
+ "removeEmpty",
+ [],
+ {"target": self, "margin": '"cols"', "select": select},
+ )
return ret
else:
sliceIns = get_slice_string(i)
- return Matrix(self.sds_context, '', [self, sliceIns],
brackets=True)
+ return Matrix(self.sds_context, "", [self, sliceIns],
brackets=True)
- def sum(self, axis: int = None) -> 'OperationNode':
+ def sum(self, axis: int = None) -> "OperationNode":
"""Calculate sum of matrix.
:param axis: can be 0 or 1 to do either row or column sums
:return: `Matrix` representing operation
"""
if axis == 0:
- return Matrix(self.sds_context, 'colSums', [self])
+ return Matrix(self.sds_context, "colSums", [self])
elif axis == 1:
- return Matrix(self.sds_context, 'rowSums', [self])
+ return Matrix(self.sds_context, "rowSums", [self])
elif axis is None:
- return Scalar(self.sds_context, 'sum', [self])
+ return Scalar(self.sds_context, "sum", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
- def mean(self, axis: int = None) -> 'OperationNode':
+ def mean(self, axis: int = None) -> "OperationNode":
"""Calculate mean of matrix.
:param axis: can be 0 or 1 to do either row or column means
:return: `Matrix` representing operation
"""
if axis == 0:
- return Matrix(self.sds_context, 'colMeans', [self])
+ return Matrix(self.sds_context, "colMeans", [self])
elif axis == 1:
- return Matrix(self.sds_context, 'rowMeans', [self])
+ return Matrix(self.sds_context, "rowMeans", [self])
elif axis is None:
- return Scalar(self.sds_context, 'mean', [self])
+ return Scalar(self.sds_context, "mean", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
- def max(self, axis: int = None) -> 'OperationNode':
+ def max(self, axis: int = None) -> "OperationNode":
"""Calculate max of matrix.
:param axis: can be 0 or 1 to do either row or column aggregation
:return: `Matrix` representing operation
"""
if axis == 0:
- return Matrix(self.sds_context, 'colMaxs', [self])
+ return Matrix(self.sds_context, "colMaxs", [self])
elif axis == 1:
- return Matrix(self.sds_context, 'rowMaxs', [self])
+ return Matrix(self.sds_context, "rowMaxs", [self])
elif axis is None:
- return Scalar(self.sds_context, 'max', [self])
+ return Scalar(self.sds_context, "max", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
- def min(self, axis: int = None) -> 'OperationNode':
+ def min(self, axis: int = None) -> "OperationNode":
"""Calculate max of matrix.
:param axis: can be 0 or 1 to do either row or column aggregation
:return: `Matrix` representing operation
"""
if axis == 0:
- return Matrix(self.sds_context, 'colMins', [self])
+ return Matrix(self.sds_context, "colMins", [self])
elif axis == 1:
- return Matrix(self.sds_context, 'rowMins', [self])
+ return Matrix(self.sds_context, "rowMins", [self])
elif axis is None:
- return Scalar(self.sds_context, 'min', [self])
+ return Scalar(self.sds_context, "min", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
- def countDistinct(self, axis: int = None) -> 'OperationNode':
+ def countDistinct(self, axis: int = None) -> "OperationNode":
"""Calculate the number of distinct values of matrix.
:param axis: can be 0 or 1 to do either row or column aggregation
:return: `Matrix` representing operation
"""
if axis == 0:
- return Matrix(self.sds_context, 'colCountDistinct', [self])
+ return Matrix(self.sds_context, "colCountDistinct", [self])
elif axis == 1:
- return Matrix(self.sds_context, 'rowCountDistinct', [self])
+ return Matrix(self.sds_context, "rowCountDistinct", [self])
elif axis is None:
- return Scalar(self.sds_context, 'countDistinct', [self])
+ return Scalar(self.sds_context, "countDistinct", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
-
- def countDistinctApprox(self, axis: int = None) -> 'OperationNode':
+ def countDistinctApprox(self, axis: int = None) -> "OperationNode":
"""Calculate the approximate number of distinct values of matrix.
:param axis: can be 0 or 1 to do either row or column aggregation
:return: `Matrix` representing operation
"""
if axis == 0:
- return Matrix(self.sds_context, 'colCountDistinctApprox', [self])
+ return Matrix(self.sds_context, "colCountDistinctApprox", [self])
elif axis == 1:
- return Matrix(self.sds_context, 'rowCountDistinctApprox', [self])
+ return Matrix(self.sds_context, "rowCountDistinctApprox", [self])
elif axis is None:
- return Scalar(self.sds_context, 'countDistinctApprox', [self])
+ return Scalar(self.sds_context, "countDistinctApprox", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
- def var(self, axis: int = None) -> 'OperationNode':
+ def var(self, axis: int = None) -> "OperationNode":
"""Calculate variance of matrix.
:param axis: can be 0 or 1 to do either row or column vars
:return: `Matrix` representing operation
"""
if axis == 0:
- return Matrix(self.sds_context, 'colVars', [self])
+ return Matrix(self.sds_context, "colVars", [self])
elif axis == 1:
- return Matrix(self.sds_context, 'rowVars', [self])
+ return Matrix(self.sds_context, "rowVars", [self])
elif axis is None:
- return Scalar(self.sds_context, 'var', [self])
+ return Scalar(self.sds_context, "var", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
- def trace(self) -> 'Scalar':
+ def trace(self) -> "Scalar":
"""Calculate trace.
:return: `Matrix` representing operation
"""
- return Scalar(self.sds_context, 'trace', [self])
+ return Scalar(self.sds_context, "trace", [self])
- def unique(self, axis: int = None) -> 'Matrix':
+ def unique(self, axis: int = None) -> "Matrix":
"""Returns the unique values for the complete matrix, for each row or
for each column.
:param axis: can be 0 or 1 to do either row or column uniques
@@ -304,156 +348,170 @@ class Matrix(OperationNode):
"""
if axis == 0:
named_input_nodes = {"dir": '"c"'}
- return Matrix(self.sds_context, 'unique', [self],
named_input_nodes=named_input_nodes)
+ return Matrix(
+ self.sds_context, "unique", [self],
named_input_nodes=named_input_nodes
+ )
elif axis == 1:
named_input_nodes = {"dir": '"r"'}
- return Matrix(self.sds_context, 'unique', [self],
named_input_nodes=named_input_nodes)
+ return Matrix(
+ self.sds_context, "unique", [self],
named_input_nodes=named_input_nodes
+ )
elif axis is None:
- return Matrix(self.sds_context, 'unique', [self])
+ return Matrix(self.sds_context, "unique", [self])
raise ValueError(
- f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}")
+ f"Axis has to be either 0, 1 or None, for column, row or complete
{self.operation}"
+ )
+
+ def sd(self) -> "Scalar":
+ """Calculate standard deviation of matrix.
- def abs(self) -> 'Matrix':
+ :return: `Matrix` representing operation
+ """
+ return Scalar(self.sds_context, "sd", [self])
+
+ def abs(self) -> "Matrix":
"""Calculate absolute.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'abs', [self])
+ return Matrix(self.sds_context, "abs", [self])
- def sqrt(self) -> 'Matrix':
+ def sqrt(self) -> "Matrix":
"""Calculate square root.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'sqrt', [self])
+ return Matrix(self.sds_context, "sqrt", [self])
- def exp(self) -> 'Matrix':
+ def exp(self) -> "Matrix":
"""Calculate exponential.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'exp', [self])
+ return Matrix(self.sds_context, "exp", [self])
- def floor(self) -> 'Matrix':
+ def floor(self) -> "Matrix":
"""Return the floor of the input, element-wise.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'floor', [self])
+ return Matrix(self.sds_context, "floor", [self])
- def ceil(self) -> 'Matrix':
+ def ceil(self) -> "Matrix":
"""Return the ceiling of the input, element-wise.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'ceil', [self])
+ return Matrix(self.sds_context, "ceil", [self])
- def log(self) -> 'Matrix':
+ def log(self) -> "Matrix":
"""Calculate logarithm.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'log', [self])
+ return Matrix(self.sds_context, "log", [self])
- def sign(self) -> 'Matrix':
+ def sign(self) -> "Matrix":
"""Returns a matrix representing the signs of the input matrix
elements,
where 1 represents positive, 0 represents zero, and -1 represents
negative.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'sign', [self])
+ return Matrix(self.sds_context, "sign", [self])
- def sin(self) -> 'Matrix':
+ def sin(self) -> "Matrix":
"""Calculate sin.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'sin', [self])
+ return Matrix(self.sds_context, "sin", [self])
- def cos(self) -> 'Matrix':
+ def cos(self) -> "Matrix":
"""Calculate cos.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'cos', [self])
+ return Matrix(self.sds_context, "cos", [self])
- def tan(self) -> 'Matrix':
+ def tan(self) -> "Matrix":
"""Calculate tan.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'tan', [self])
+ return Matrix(self.sds_context, "tan", [self])
- def asin(self) -> 'Matrix':
+ def asin(self) -> "Matrix":
"""Calculate arcsin.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'asin', [self])
+ return Matrix(self.sds_context, "asin", [self])
- def acos(self) -> 'Matrix':
+ def acos(self) -> "Matrix":
"""Calculate arccos.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'acos', [self])
+ return Matrix(self.sds_context, "acos", [self])
- def atan(self) -> 'Matrix':
+ def atan(self) -> "Matrix":
"""Calculate arctan.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'atan', [self])
+ return Matrix(self.sds_context, "atan", [self])
- def sinh(self) -> 'Matrix':
+ def sinh(self) -> "Matrix":
"""Calculate sin.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'sinh', [self])
+ return Matrix(self.sds_context, "sinh", [self])
- def cosh(self) -> 'Matrix':
+ def cosh(self) -> "Matrix":
"""Calculate cos.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'cosh', [self])
+ return Matrix(self.sds_context, "cosh", [self])
- def tanh(self) -> 'Matrix':
+ def tanh(self) -> "Matrix":
"""Calculate tan.
:return: `Matrix` representing operation
"""
- return Matrix(self.sds_context, 'tanh', [self])
+ return Matrix(self.sds_context, "tanh", [self])
- def moment(self, moment: int, weights: OperationNode = None) -> 'Matrix':
+ def moment(self, moment: int, weights: OperationNode = None) -> "Matrix":
unnamed_inputs = [self]
if weights is not None:
unnamed_inputs.append(weights)
unnamed_inputs.append(moment)
- return Matrix(self.sds_context, 'moment', unnamed_inputs,
output_type=OutputType.DOUBLE)
+ return Matrix(
+ self.sds_context, "moment", unnamed_inputs,
output_type=OutputType.DOUBLE
+ )
- def cholesky(self, safe: bool = False) -> 'Matrix':
- """ Computes the Cholesky decomposition of a symmetric, positive
definite matrix
+ def cholesky(self, safe: bool = False) -> "Matrix":
+ """Computes the Cholesky decomposition of a symmetric, positive
definite matrix
:param safe: default value is False, if flag is True additional checks
to ensure
that the matrix is symmetric positive definite are applied, if
False, checks will be skipped
:return: the OperationNode representing this operation
"""
- return Matrix(self.sds_context, 'cholesky', [self])
+ return Matrix(self.sds_context, "cholesky", [self])
- def diag(self) -> 'Matrix':
- """ Create diagonal matrix from (n x 1) matrix, or take diagonal from
square matrix
+ def diag(self) -> "Matrix":
+ """Create diagonal matrix from (n x 1) matrix, or take diagonal from
square matrix
:return: the OperationNode representing this operation
"""
- return Matrix(self.sds_context, 'diag', [self])
+ return Matrix(self.sds_context, "diag", [self])
- def svd(self) -> 'Matrix':
+ def svd(self) -> "Matrix":
"""
- Singular Value Decomposition of a matrix A (of size m x m), which
decomposes into three matrices
- U, V, and S as A = U %% S %% t(V), where U is an m x m unitary matrix
(i.e., orthogonal),
- V is an n x n unitary matrix (also orthogonal),
+ Singular Value Decomposition of a matrix A (of size m x m), which
decomposes into three matrices
+ U, V, and S as A = U %% S %% t(V), where U is an m x m unitary matrix
(i.e., orthogonal),
+ V is an n x n unitary matrix (also orthogonal),
and S is an m x n matrix with non-negative real numbers on the
diagonal.
matrices U <(m x m)>, S <(m x n)>, and V <(n x n)>
@@ -461,16 +519,17 @@ class Matrix(OperationNode):
:return: The MultiReturn node containing the three Matrices U,S, and V
"""
- U = Matrix(self.sds_context, '')
- S = Matrix(self.sds_context, '')
- V = Matrix(self.sds_context, '')
- output_nodes = [U, S, V ]
+ U = Matrix(self.sds_context, "")
+ S = Matrix(self.sds_context, "")
+ V = Matrix(self.sds_context, "")
+ output_nodes = [U, S, V]
- op = MultiReturn(self.sds_context, 'svd', output_nodes,
unnamed_input_nodes=[self])
+ op = MultiReturn(
+ self.sds_context, "svd", output_nodes, unnamed_input_nodes=[self]
+ )
return op
-
- def eigen(self) -> 'Matrix':
+ def eigen(self) -> "Matrix":
"""
Computes Eigen decomposition of input matrix A. The Eigen
decomposition consists of
two matrices V and w such that A = V %*% diag(w) %*% t(V). The columns
of V are the
@@ -483,16 +542,17 @@ class Matrix(OperationNode):
:return: The MultiReturn node containing the two Matrices w and V
"""
-
- V = Matrix(self.sds_context, '')
- w = Matrix(self.sds_context, '')
- output_nodes = [w,V]
- op = MultiReturn(self.sds_context, 'eigen', output_nodes,
unnamed_input_nodes=[self])
+
+ V = Matrix(self.sds_context, "")
+ w = Matrix(self.sds_context, "")
+ output_nodes = [w, V]
+ op = MultiReturn(
+ self.sds_context, "eigen", output_nodes, unnamed_input_nodes=[self]
+ )
return op
-
- def to_one_hot(self, num_classes: int) -> 'Matrix':
- """ OneHot encode the matrix.
+ def to_one_hot(self, num_classes: int) -> "Matrix":
+ """OneHot encode the matrix.
It is assumed that there is only one column to encode, and all values
are whole numbers > 0
@@ -503,34 +563,35 @@ class Matrix(OperationNode):
raise ValueError("Number of classes should be larger than 1")
named_input_nodes = {"X": self, "numClasses": num_classes}
- return Matrix(self.sds_context, 'toOneHot',
named_input_nodes=named_input_nodes)
+ return Matrix(self.sds_context, "toOneHot",
named_input_nodes=named_input_nodes)
- def rbind(self, other) -> 'Matrix':
+ def rbind(self, other) -> "Matrix":
"""
- Row-wise matrix concatenation, by concatenating the second matrix as
additional rows to the first matrix.
+ Row-wise matrix concatenation, by concatenating the second matrix as
additional rows to the first matrix.
:param: The other matrix to bind to the right hand side
:return: The OperationNode containing the concatenated matrices/frames.
"""
return Matrix(self.sds_context, "rbind", [self, other])
- def cbind(self, other) -> 'Matrix':
+ def cbind(self, other) -> "Matrix":
"""
- Column-wise matrix concatenation, by concatenating the second matrix
as additional columns to the first matrix.
+ Column-wise matrix concatenation, by concatenating the second matrix
as additional columns to the first matrix.
:param: The other matrix to bind to the right hand side.
:return: The OperationNode containing the concatenated matrices/frames.
"""
return Matrix(self.sds_context, "cbind", [self, other])
- def t(self) -> 'Matrix':
- """ Transposes the input
+ def t(self) -> "Matrix":
+ """Transposes the input
:return: the OperationNode representing this operation
"""
- return Matrix(self.sds_context, 't', [self])
+ return Matrix(self.sds_context, "t", [self])
- def order(self, by: int = 1, decreasing: bool = False,
- index_return: bool = False) -> 'Matrix':
- """ Sort by a column of the matrix X in increasing/decreasing order
and returns either the index or data
+ def order(
+ self, by: int = 1, decreasing: bool = False, index_return: bool = False
+ ) -> "Matrix":
+ """Sort by a column of the matrix X in increasing/decreasing order and
returns either the index or data
:param by: sort matrix by this column number
:param decreasing: If true the matrix will be sorted in decreasing
order
@@ -538,59 +599,77 @@ class Matrix(OperationNode):
:return: the OperationNode representing this operation
"""
- named_input_nodes = {'target': self, 'by': by, 'decreasing':
str(decreasing).upper(),
- 'index.return': str(index_return).upper()}
+ named_input_nodes = {
+ "target": self,
+ "by": by,
+ "decreasing": str(decreasing).upper(),
+ "index.return": str(index_return).upper(),
+ }
- return Matrix(self.sds_context, 'order', [],
named_input_nodes=named_input_nodes)
+ return Matrix(
+ self.sds_context, "order", [], named_input_nodes=named_input_nodes
+ )
- def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'Scalar':
- """ Converts the input to a string representation.
+ def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> "Scalar":
+ """Converts the input to a string representation.
:return: `Scalar` containing the string.
"""
- return Scalar(self.sds_context, 'toString', [self], kwargs,
output_type=OutputType.STRING)
+ return Scalar(
+ self.sds_context, "toString", [self], kwargs,
output_type=OutputType.STRING
+ )
- def isNA(self) -> 'Matrix':
- """ Computes a boolean indicator matrix of the same shape as the
input, indicating where NA (not available)
+ def isNA(self) -> "Matrix":
+ """Computes a boolean indicator matrix of the same shape as the input,
indicating where NA (not available)
values are located. Currently, NA is only capturing NaN values.
:return: the OperationNode representing this operation
"""
- return Matrix(self.sds_context, 'isNA', [self])
+ return Matrix(self.sds_context, "isNA", [self])
- def isNaN(self) -> 'Matrix':
- """ Computes a boolean indicator matrix of the same shape as the
input, indicating where NaN (not a number)
+ def isNaN(self) -> "Matrix":
+ """Computes a boolean indicator matrix of the same shape as the input,
indicating where NaN (not a number)
values are located.
:return: the OperationNode representing this operation
"""
- return Matrix(self.sds_context, 'isNaN', [self])
+ return Matrix(self.sds_context, "isNaN", [self])
- def isInf(self) -> 'Matrix':
- """ Computes a boolean indicator matrix of the same shape as the
input, indicating where Inf (positive or
+ def isInf(self) -> "Matrix":
+ """Computes a boolean indicator matrix of the same shape as the input,
indicating where Inf (positive or
negative infinity) values are located.
:return: the OperationNode representing this operation
"""
- return Matrix(self.sds_context, 'isInf', [self])
+ return Matrix(self.sds_context, "isInf", [self])
- def rev(self) -> 'Matrix':
- """ Reverses the rows
+ def rev(self) -> "Matrix":
+ """Reverses the rows
:return: the OperationNode representing this operation
"""
- return Matrix(self.sds_context, 'rev', [self])
+ return Matrix(self.sds_context, "rev", [self])
- def round(self) -> 'Matrix':
- """ round all values to nearest natural number
+ def round(self) -> "Matrix":
+ """round all values to nearest natural number
:return: The Matrix representing the result of this operation
"""
return Matrix(self.sds_context, "round", [self])
- def replace(self, pattern: VALID_INPUT_TYPES, replacement:
VALID_INPUT_TYPES) -> 'Matrix':
+ def replace(
+ self, pattern: VALID_INPUT_TYPES, replacement: VALID_INPUT_TYPES
+ ) -> "Matrix":
"""
Replace all values with replacement value
"""
- return Matrix(self.sds_context, "replace",
named_input_nodes={"target": self, "pattern": pattern, "replacement":
replacement})
+ return Matrix(
+ self.sds_context,
+ "replace",
+ named_input_nodes={
+ "target": self,
+ "pattern": pattern,
+ "replacement": replacement,
+ },
+ )
def __str__(self):
return "MatrixNode"
diff --git a/src/main/python/tests/matrix/test_aggregations.py
b/src/main/python/tests/matrix/test_aggregations.py
index 8627d2547c..d02d5dfb3e 100644
--- a/src/main/python/tests/matrix/test_aggregations.py
+++ b/src/main/python/tests/matrix/test_aggregations.py
@@ -33,6 +33,7 @@ m2.shape = (dim, dim)
m3 = np.array(np.random.randint(10, size=dim * dim * 10) + 1, dtype=np.double)
m3.shape = (dim * 10, dim)
+
class TestMatrixAggFn(unittest.TestCase):
sds: SystemDSContext = None
@@ -45,148 +46,239 @@ class TestMatrixAggFn(unittest.TestCase):
cls.sds.close()
def test_sum1(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).sum().compute(), m1.sum()))
+ self.assertTrue(np.allclose(self.sds.from_numpy(m1).sum().compute(),
m1.sum()))
def test_sum2(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).sum(axis=0).compute(), m1.sum(axis=0)))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m1).sum(axis=0).compute(),
m1.sum(axis=0))
+ )
def test_sum3(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).sum(axis=1).compute(),
m1.sum(axis=1).reshape(dim, 1)))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).sum(axis=1).compute(),
+ m1.sum(axis=1).reshape(dim, 1),
+ )
+ )
def test_mean1(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).mean().compute(), m1.mean()))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m1).mean().compute(), m1.mean())
+ )
def test_mean2(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).mean(axis=0).compute(), m1.mean(axis=0)))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m1).mean(axis=0).compute(),
m1.mean(axis=0))
+ )
def test_mean3(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).mean(axis=1).compute(),
m1.mean(axis=1).reshape(dim, 1)))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).mean(axis=1).compute(),
+ m1.mean(axis=1).reshape(dim, 1),
+ )
+ )
def test_full(self):
- self.assertTrue(np.allclose(
- self.sds.full((2, 3), 10.1).compute(), np.full((2, 3), 10.1)))
+ self.assertTrue(
+ np.allclose(self.sds.full((2, 3), 10.1).compute(), np.full((2, 3),
10.1))
+ )
def test_seq(self):
- self.assertTrue(np.allclose(
- self.sds.seq(3).compute(), np.arange(4).reshape(4, 1)))
+ self.assertTrue(
+ np.allclose(self.sds.seq(3).compute(), np.arange(4).reshape(4, 1))
+ )
def test_var1(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).var().compute(), m1.var(ddof=1)))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m1).var().compute(),
m1.var(ddof=1))
+ )
def test_var2(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).var(axis=0).compute(), m1.var(axis=0,
ddof=1)))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).var(axis=0).compute(), m1.var(axis=0,
ddof=1)
+ )
+ )
def test_var3(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).var(axis=1).compute(), m1.var(axis=1,
ddof=1).reshape(dim, 1)))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).var(axis=1).compute(),
+ m1.var(axis=1, ddof=1).reshape(dim, 1),
+ )
+ )
def test_min1(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).min().compute(), m1.min()))
+ self.assertTrue(np.allclose(self.sds.from_numpy(m1).min().compute(),
m1.min()))
def test_min2(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).min(axis=0).compute(), m1.min(axis=0)))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m1).min(axis=0).compute(),
m1.min(axis=0))
+ )
def test_min3(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).min(axis=1).compute(),
m1.min(axis=1).reshape(dim, 1)))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).min(axis=1).compute(),
+ m1.min(axis=1).reshape(dim, 1),
+ )
+ )
def test_max1(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).max().compute(), m1.max()))
+ self.assertTrue(np.allclose(self.sds.from_numpy(m1).max().compute(),
m1.max()))
def test_max2(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).max(axis=0).compute(), m1.max(axis=0)))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m1).max(axis=0).compute(),
m1.max(axis=0))
+ )
def test_max3(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).max(axis=1).compute(),
m1.max(axis=1).reshape(dim, 1)))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).max(axis=1).compute(),
+ m1.max(axis=1).reshape(dim, 1),
+ )
+ )
def test_trace1(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).trace().compute(), m1.trace()))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m1).trace().compute(), m1.trace())
+ )
def test_trace2(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m2).trace().compute(), m2.trace()))
+ self.assertTrue(
+ np.allclose(self.sds.from_numpy(m2).trace().compute(), m2.trace())
+ )
def test_countDistinctApprox1(self):
distinct = 100
- m = np.round(np.random.random((1000, 1000))*(distinct - 1))
+ m = np.round(np.random.random((1000, 1000)) * (distinct - 1))
# allow and error of 1%
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m).countDistinctApprox().compute(),
len(np.unique(m)), 1))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m).countDistinctApprox().compute(),
+ len(np.unique(m)),
+ 1,
+ )
+ )
def test_countDistinctApprox2(self):
distinct = 1000
- m = np.round(np.random.random((10000, 100))*(distinct - 1))
+ m = np.round(np.random.random((10000, 100)) * (distinct - 1))
# allow and error of 1%
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m).countDistinctApprox(0).compute(),
[len(np.unique(col))*100 for col in m.T], 10))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m).countDistinctApprox(0).compute(),
+ [len(np.unique(col)) * 100 for col in m.T],
+ 10,
+ )
+ )
def test_countDistinctApprox3(self):
distinct = 1000
- m = np.round(np.random.random((100, 10000))*(distinct - 1))
+ m = np.round(np.random.random((100, 10000)) * (distinct - 1))
# allow and error of 1%
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m).countDistinctApprox(1).compute(),
np.array([[len(np.unique(col))] for col in m]), 10))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m).countDistinctApprox(1).compute(),
+ np.array([[len(np.unique(col))] for col in m]),
+ 10,
+ )
+ )
def test_countDistinctApprox4(self):
m = np.round(np.random.random((2, 2)))
with self.assertRaises(ValueError):
self.sds.from_numpy(m).countDistinctApprox(2)
-
def test_countDistinct1(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).countDistinct().compute(),
len(np.unique(m1))))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).countDistinct().compute(),
len(np.unique(m1))
+ )
+ )
def test_countDistinct2(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m2).countDistinct().compute(),
len(np.unique(m2))))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m2).countDistinct().compute(),
len(np.unique(m2))
+ )
+ )
def test_countDistinct3(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m3).countDistinct().compute(),
len(np.unique(m3))))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m3).countDistinct().compute(),
len(np.unique(m3))
+ )
+ )
def test_countDistinct4(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).countDistinct(0).compute(),
[len(np.unique(col)) for col in m1.T]))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).countDistinct(0).compute(),
+ [len(np.unique(col)) for col in m1.T],
+ )
+ )
def test_countDistinct5(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m2).countDistinct(0).compute(),
[len(np.unique(col)) for col in m2.T]))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m2).countDistinct(0).compute(),
+ [len(np.unique(col)) for col in m2.T],
+ )
+ )
def test_countDistinct6(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m3).countDistinct(0).compute(),
[len(np.unique(col)) for col in m3.T]))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m3).countDistinct(0).compute(),
+ [len(np.unique(col)) for col in m3.T],
+ )
+ )
def test_countDistinct7(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m1).countDistinct(1).compute(),
np.array([[len(np.unique(col))] for col in m1])))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).countDistinct(1).compute(),
+ np.array([[len(np.unique(col))] for col in m1]),
+ )
+ )
def test_countDistinct8(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m2).countDistinct(1).compute(),
np.array([[len(np.unique(col))] for col in m2])))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m2).countDistinct(1).compute(),
+ np.array([[len(np.unique(col))] for col in m2]),
+ )
+ )
def test_countDistinct9(self):
- self.assertTrue(np.allclose(
- self.sds.from_numpy(m3).countDistinct(1).compute(),
np.array([[len(np.unique(col))] for col in m3])))
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m3).countDistinct(1).compute(),
+ np.array([[len(np.unique(col))] for col in m3]),
+ )
+ )
def test_countDistinct10(self):
with self.assertRaises(ValueError):
self.sds.from_numpy(m3).countDistinct(2)
+ def test_sd1(self):
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m1).sd().compute(), np.std(m1, ddof=1),
1e-9
+ )
+ )
+
+ def test_sd2(self):
+ self.assertTrue(
+ np.allclose(
+ self.sds.from_numpy(m2).sd().compute(), np.std(m2, ddof=1),
1e-9
+ )
+ )
+
if __name__ == "__main__":
unittest.main(exit=False)