This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 06ccfd629d [SYSTEMDS-3749] Python API missing builtin sd 06ccfd629d is described below commit 06ccfd629ddaf084ae35aa095c3120dbceef68a3 Author: e-strauss <lathan...@gmx.de> AuthorDate: Tue Sep 3 22:20:45 2024 +0200 [SYSTEMDS-3749] Python API missing builtin sd Also included in this commit is a reformatting of the Matrix Operation node. Closes #2092 --- src/main/python/systemds/operator/nodes/matrix.py | 515 +++++++++++++--------- src/main/python/tests/matrix/test_aggregations.py | 224 +++++++--- 2 files changed, 455 insertions(+), 284 deletions(-) diff --git a/src/main/python/systemds/operator/nodes/matrix.py b/src/main/python/systemds/operator/nodes/matrix.py index 3f02daa343..fafb815ca4 100644 --- a/src/main/python/systemds/operator/nodes/matrix.py +++ b/src/main/python/systemds/operator/nodes/matrix.py @@ -29,21 +29,31 @@ from systemds.operator.operation_node import OperationNode from systemds.operator.nodes.multi_return import MultiReturn from systemds.operator.nodes.scalar import Scalar from systemds.script_building.dag import OutputType -from systemds.utils.consts import (BINARY_OPERATIONS, VALID_ARITHMETIC_TYPES, - VALID_INPUT_TYPES) -from systemds.utils.converters import (matrix_block_to_numpy, - numpy_to_matrix_block) -from systemds.utils.helpers import check_is_empty_slice, check_no_less_than_zero, get_slice_string +from systemds.utils.consts import ( + BINARY_OPERATIONS, + VALID_ARITHMETIC_TYPES, + VALID_INPUT_TYPES, +) +from systemds.utils.converters import matrix_block_to_numpy, numpy_to_matrix_block +from systemds.utils.helpers import ( + check_is_empty_slice, + check_no_less_than_zero, + get_slice_string, +) class Matrix(OperationNode): _np_array: np.array - def __init__(self, sds_context, operation: str, - unnamed_input_nodes: Union[str, - Iterable[VALID_INPUT_TYPES]] = None, - named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, - local_data: np.array = None, brackets: bool = False) -> 'Matrix': + def __init__( + self, + sds_context, + operation: str, + unnamed_input_nodes: Union[str, Iterable[VALID_INPUT_TYPES]] = None, + named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, + local_data: np.array = None, + brackets: bool = False, + ) -> "Matrix": is_python_local_data = False if local_data is not None: @@ -52,17 +62,33 @@ class Matrix(OperationNode): else: self._np_array = None - super().__init__(sds_context, operation, unnamed_input_nodes, - named_input_nodes, OutputType.MATRIX, is_python_local_data, brackets) - - def pass_python_data_to_prepared_script(self, sds, var_name: str, prepared_script: JavaObject) -> None: - assert self.is_python_local_data, 'Can only pass data to prepared script if it is python local!' + super().__init__( + sds_context, + operation, + unnamed_input_nodes, + named_input_nodes, + OutputType.MATRIX, + is_python_local_data, + brackets, + ) + + def pass_python_data_to_prepared_script( + self, sds, var_name: str, prepared_script: JavaObject + ) -> None: + assert ( + self.is_python_local_data + ), "Can only pass data to prepared script if it is python local!" if self._is_numpy(): - prepared_script.setMatrix(var_name, numpy_to_matrix_block( - sds, self._np_array), True) # True for reuse - - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], - named_input_vars: Dict[str, str]) -> str: + prepared_script.setMatrix( + var_name, numpy_to_matrix_block(sds, self._np_array), True + ) # True for reuse + + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: code_line = super().code_line(var_name, unnamed_input_vars, named_input_vars) if self._is_numpy(): code_line = code_line.format(file_name=var_name) @@ -70,94 +96,96 @@ class Matrix(OperationNode): def compute(self, verbose: bool = False, lineage: bool = False) -> np.array: if self._is_numpy(): - self.sds_context._log.info('Numpy Array - No Compilation necessary') + self.sds_context._log.info("Numpy Array - No Compilation necessary") return self._np_array else: return super().compute(verbose, lineage) def _parse_output_result_variables(self, result_variables): - return matrix_block_to_numpy(self.sds_context.java_gateway.jvm, - result_variables.getMatrixBlock(self._script.out_var_name[0])) + return matrix_block_to_numpy( + self.sds_context.java_gateway.jvm, + result_variables.getMatrixBlock(self._script.out_var_name[0]), + ) def _is_numpy(self) -> bool: return self._np_array is not None - def __add__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '+', [self, other]) + def __add__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "+", [self, other]) # Left hand side - def __radd__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '+', [other, self]) + def __radd__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "+", [other, self]) - def __sub__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '-', [self, other]) + def __sub__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "-", [self, other]) # Left hand side - def __rsub__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '-', [other, self]) + def __rsub__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "-", [other, self]) - def __mul__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '*', [self, other]) + def __mul__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "*", [self, other]) - def __rmul__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '*', [other, self]) + def __rmul__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "*", [other, self]) - def __truediv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '/', [self, other]) + def __truediv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "/", [self, other]) - def __rtruediv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '/', [other, self]) + def __rtruediv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "/", [other, self]) - def __floordiv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '//', [self, other]) + def __floordiv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "//", [self, other]) - def __rfloordiv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Matrix': - return Matrix(self.sds_context, '//', [other, self]) + def __rfloordiv__(self, other: VALID_ARITHMETIC_TYPES) -> "Matrix": + return Matrix(self.sds_context, "//", [other, self]) - def __lt__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '<', [self, other]) + def __lt__(self, other) -> "Matrix": + return Matrix(self.sds_context, "<", [self, other]) - def __rlt__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '<', [other, self]) + def __rlt__(self, other) -> "Matrix": + return Matrix(self.sds_context, "<", [other, self]) - def __le__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '<=', [self, other]) + def __le__(self, other) -> "Matrix": + return Matrix(self.sds_context, "<=", [self, other]) - def __rle__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '<=', [other, self]) + def __rle__(self, other) -> "Matrix": + return Matrix(self.sds_context, "<=", [other, self]) - def __gt__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '>', [self, other]) + def __gt__(self, other) -> "Matrix": + return Matrix(self.sds_context, ">", [self, other]) - def __rgt__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '>', [other, self]) + def __rgt__(self, other) -> "Matrix": + return Matrix(self.sds_context, ">", [other, self]) - def __ge__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '>=', [self, other]) + def __ge__(self, other) -> "Matrix": + return Matrix(self.sds_context, ">=", [self, other]) - def __rge__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '>=', [other, self]) + def __rge__(self, other) -> "Matrix": + return Matrix(self.sds_context, ">=", [other, self]) - def __eq__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '==', [self, other]) + def __eq__(self, other) -> "Matrix": + return Matrix(self.sds_context, "==", [self, other]) - def __req__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '==', [other, self]) + def __req__(self, other) -> "Matrix": + return Matrix(self.sds_context, "==", [other, self]) - def __ne__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '!=', [self, other]) + def __ne__(self, other) -> "Matrix": + return Matrix(self.sds_context, "!=", [self, other]) - def __rne__(self, other) -> 'Matrix': - return Matrix(self.sds_context, '!=', [other, self]) + def __rne__(self, other) -> "Matrix": + return Matrix(self.sds_context, "!=", [other, self]) - def __matmul__(self, other: 'Matrix') -> 'Matrix': - return Matrix(self.sds_context, '%*%', [self, other]) + def __matmul__(self, other: "Matrix") -> "Matrix": + return Matrix(self.sds_context, "%*%", [self, other]) - def nRow(self) -> 'Scalar': - return Scalar(self.sds_context, 'nrow', [self]) + def nRow(self) -> "Scalar": + return Scalar(self.sds_context, "nrow", [self]) - def nCol(self) -> 'Scalar': - return Scalar(self.sds_context, 'ncol', [self]) + def nCol(self) -> "Scalar": + return Scalar(self.sds_context, "ncol", [self]) def __getitem__(self, i): if isinstance(i, tuple) and len(i) > 2: @@ -165,138 +193,154 @@ class Matrix(OperationNode): elif isinstance(i, list): check_no_less_than_zero(i) slice = self.sds_context.from_numpy(np.array(i)) + 1 - select = Matrix(self.sds_context, "table", - [slice, 1, self.nRow(), 1]) - ret = Matrix(self.sds_context, "removeEmpty", [], { - 'target': self, 'margin': '"rows"', 'select': select}) + select = Matrix(self.sds_context, "table", [slice, 1, self.nRow(), 1]) + ret = Matrix( + self.sds_context, + "removeEmpty", + [], + {"target": self, "margin": '"rows"', "select": select}, + ) return ret elif isinstance(i, tuple) and isinstance(i[0], list) and isinstance(i[1], list): raise NotImplementedError("double slicing is not supported yet") - elif isinstance(i, tuple) and check_is_empty_slice(i[0]) and isinstance(i[1], list): + elif ( + isinstance(i, tuple) + and check_is_empty_slice(i[0]) + and isinstance(i[1], list) + ): check_no_less_than_zero(i[1]) slice = self.sds_context.from_numpy(np.array(i[1])) + 1 - select = Matrix(self.sds_context, "table", - [slice, 1, self.nCol(), 1]) - ret = Matrix(self.sds_context, "removeEmpty", [], { - 'target': self, 'margin': '"cols"', 'select': select}) + select = Matrix(self.sds_context, "table", [slice, 1, self.nCol(), 1]) + ret = Matrix( + self.sds_context, + "removeEmpty", + [], + {"target": self, "margin": '"cols"', "select": select}, + ) return ret else: sliceIns = get_slice_string(i) - return Matrix(self.sds_context, '', [self, sliceIns], brackets=True) + return Matrix(self.sds_context, "", [self, sliceIns], brackets=True) - def sum(self, axis: int = None) -> 'OperationNode': + def sum(self, axis: int = None) -> "OperationNode": """Calculate sum of matrix. :param axis: can be 0 or 1 to do either row or column sums :return: `Matrix` representing operation """ if axis == 0: - return Matrix(self.sds_context, 'colSums', [self]) + return Matrix(self.sds_context, "colSums", [self]) elif axis == 1: - return Matrix(self.sds_context, 'rowSums', [self]) + return Matrix(self.sds_context, "rowSums", [self]) elif axis is None: - return Scalar(self.sds_context, 'sum', [self]) + return Scalar(self.sds_context, "sum", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) - def mean(self, axis: int = None) -> 'OperationNode': + def mean(self, axis: int = None) -> "OperationNode": """Calculate mean of matrix. :param axis: can be 0 or 1 to do either row or column means :return: `Matrix` representing operation """ if axis == 0: - return Matrix(self.sds_context, 'colMeans', [self]) + return Matrix(self.sds_context, "colMeans", [self]) elif axis == 1: - return Matrix(self.sds_context, 'rowMeans', [self]) + return Matrix(self.sds_context, "rowMeans", [self]) elif axis is None: - return Scalar(self.sds_context, 'mean', [self]) + return Scalar(self.sds_context, "mean", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) - def max(self, axis: int = None) -> 'OperationNode': + def max(self, axis: int = None) -> "OperationNode": """Calculate max of matrix. :param axis: can be 0 or 1 to do either row or column aggregation :return: `Matrix` representing operation """ if axis == 0: - return Matrix(self.sds_context, 'colMaxs', [self]) + return Matrix(self.sds_context, "colMaxs", [self]) elif axis == 1: - return Matrix(self.sds_context, 'rowMaxs', [self]) + return Matrix(self.sds_context, "rowMaxs", [self]) elif axis is None: - return Scalar(self.sds_context, 'max', [self]) + return Scalar(self.sds_context, "max", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) - def min(self, axis: int = None) -> 'OperationNode': + def min(self, axis: int = None) -> "OperationNode": """Calculate max of matrix. :param axis: can be 0 or 1 to do either row or column aggregation :return: `Matrix` representing operation """ if axis == 0: - return Matrix(self.sds_context, 'colMins', [self]) + return Matrix(self.sds_context, "colMins", [self]) elif axis == 1: - return Matrix(self.sds_context, 'rowMins', [self]) + return Matrix(self.sds_context, "rowMins", [self]) elif axis is None: - return Scalar(self.sds_context, 'min', [self]) + return Scalar(self.sds_context, "min", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) - def countDistinct(self, axis: int = None) -> 'OperationNode': + def countDistinct(self, axis: int = None) -> "OperationNode": """Calculate the number of distinct values of matrix. :param axis: can be 0 or 1 to do either row or column aggregation :return: `Matrix` representing operation """ if axis == 0: - return Matrix(self.sds_context, 'colCountDistinct', [self]) + return Matrix(self.sds_context, "colCountDistinct", [self]) elif axis == 1: - return Matrix(self.sds_context, 'rowCountDistinct', [self]) + return Matrix(self.sds_context, "rowCountDistinct", [self]) elif axis is None: - return Scalar(self.sds_context, 'countDistinct', [self]) + return Scalar(self.sds_context, "countDistinct", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) - - def countDistinctApprox(self, axis: int = None) -> 'OperationNode': + def countDistinctApprox(self, axis: int = None) -> "OperationNode": """Calculate the approximate number of distinct values of matrix. :param axis: can be 0 or 1 to do either row or column aggregation :return: `Matrix` representing operation """ if axis == 0: - return Matrix(self.sds_context, 'colCountDistinctApprox', [self]) + return Matrix(self.sds_context, "colCountDistinctApprox", [self]) elif axis == 1: - return Matrix(self.sds_context, 'rowCountDistinctApprox', [self]) + return Matrix(self.sds_context, "rowCountDistinctApprox", [self]) elif axis is None: - return Scalar(self.sds_context, 'countDistinctApprox', [self]) + return Scalar(self.sds_context, "countDistinctApprox", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) - def var(self, axis: int = None) -> 'OperationNode': + def var(self, axis: int = None) -> "OperationNode": """Calculate variance of matrix. :param axis: can be 0 or 1 to do either row or column vars :return: `Matrix` representing operation """ if axis == 0: - return Matrix(self.sds_context, 'colVars', [self]) + return Matrix(self.sds_context, "colVars", [self]) elif axis == 1: - return Matrix(self.sds_context, 'rowVars', [self]) + return Matrix(self.sds_context, "rowVars", [self]) elif axis is None: - return Scalar(self.sds_context, 'var', [self]) + return Scalar(self.sds_context, "var", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) - def trace(self) -> 'Scalar': + def trace(self) -> "Scalar": """Calculate trace. :return: `Matrix` representing operation """ - return Scalar(self.sds_context, 'trace', [self]) + return Scalar(self.sds_context, "trace", [self]) - def unique(self, axis: int = None) -> 'Matrix': + def unique(self, axis: int = None) -> "Matrix": """Returns the unique values for the complete matrix, for each row or for each column. :param axis: can be 0 or 1 to do either row or column uniques @@ -304,156 +348,170 @@ class Matrix(OperationNode): """ if axis == 0: named_input_nodes = {"dir": '"c"'} - return Matrix(self.sds_context, 'unique', [self], named_input_nodes=named_input_nodes) + return Matrix( + self.sds_context, "unique", [self], named_input_nodes=named_input_nodes + ) elif axis == 1: named_input_nodes = {"dir": '"r"'} - return Matrix(self.sds_context, 'unique', [self], named_input_nodes=named_input_nodes) + return Matrix( + self.sds_context, "unique", [self], named_input_nodes=named_input_nodes + ) elif axis is None: - return Matrix(self.sds_context, 'unique', [self]) + return Matrix(self.sds_context, "unique", [self]) raise ValueError( - f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}") + f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}" + ) + + def sd(self) -> "Scalar": + """Calculate standard deviation of matrix. - def abs(self) -> 'Matrix': + :return: `Matrix` representing operation + """ + return Scalar(self.sds_context, "sd", [self]) + + def abs(self) -> "Matrix": """Calculate absolute. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'abs', [self]) + return Matrix(self.sds_context, "abs", [self]) - def sqrt(self) -> 'Matrix': + def sqrt(self) -> "Matrix": """Calculate square root. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'sqrt', [self]) + return Matrix(self.sds_context, "sqrt", [self]) - def exp(self) -> 'Matrix': + def exp(self) -> "Matrix": """Calculate exponential. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'exp', [self]) + return Matrix(self.sds_context, "exp", [self]) - def floor(self) -> 'Matrix': + def floor(self) -> "Matrix": """Return the floor of the input, element-wise. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'floor', [self]) + return Matrix(self.sds_context, "floor", [self]) - def ceil(self) -> 'Matrix': + def ceil(self) -> "Matrix": """Return the ceiling of the input, element-wise. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'ceil', [self]) + return Matrix(self.sds_context, "ceil", [self]) - def log(self) -> 'Matrix': + def log(self) -> "Matrix": """Calculate logarithm. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'log', [self]) + return Matrix(self.sds_context, "log", [self]) - def sign(self) -> 'Matrix': + def sign(self) -> "Matrix": """Returns a matrix representing the signs of the input matrix elements, where 1 represents positive, 0 represents zero, and -1 represents negative. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'sign', [self]) + return Matrix(self.sds_context, "sign", [self]) - def sin(self) -> 'Matrix': + def sin(self) -> "Matrix": """Calculate sin. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'sin', [self]) + return Matrix(self.sds_context, "sin", [self]) - def cos(self) -> 'Matrix': + def cos(self) -> "Matrix": """Calculate cos. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'cos', [self]) + return Matrix(self.sds_context, "cos", [self]) - def tan(self) -> 'Matrix': + def tan(self) -> "Matrix": """Calculate tan. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'tan', [self]) + return Matrix(self.sds_context, "tan", [self]) - def asin(self) -> 'Matrix': + def asin(self) -> "Matrix": """Calculate arcsin. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'asin', [self]) + return Matrix(self.sds_context, "asin", [self]) - def acos(self) -> 'Matrix': + def acos(self) -> "Matrix": """Calculate arccos. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'acos', [self]) + return Matrix(self.sds_context, "acos", [self]) - def atan(self) -> 'Matrix': + def atan(self) -> "Matrix": """Calculate arctan. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'atan', [self]) + return Matrix(self.sds_context, "atan", [self]) - def sinh(self) -> 'Matrix': + def sinh(self) -> "Matrix": """Calculate sin. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'sinh', [self]) + return Matrix(self.sds_context, "sinh", [self]) - def cosh(self) -> 'Matrix': + def cosh(self) -> "Matrix": """Calculate cos. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'cosh', [self]) + return Matrix(self.sds_context, "cosh", [self]) - def tanh(self) -> 'Matrix': + def tanh(self) -> "Matrix": """Calculate tan. :return: `Matrix` representing operation """ - return Matrix(self.sds_context, 'tanh', [self]) + return Matrix(self.sds_context, "tanh", [self]) - def moment(self, moment: int, weights: OperationNode = None) -> 'Matrix': + def moment(self, moment: int, weights: OperationNode = None) -> "Matrix": unnamed_inputs = [self] if weights is not None: unnamed_inputs.append(weights) unnamed_inputs.append(moment) - return Matrix(self.sds_context, 'moment', unnamed_inputs, output_type=OutputType.DOUBLE) + return Matrix( + self.sds_context, "moment", unnamed_inputs, output_type=OutputType.DOUBLE + ) - def cholesky(self, safe: bool = False) -> 'Matrix': - """ Computes the Cholesky decomposition of a symmetric, positive definite matrix + def cholesky(self, safe: bool = False) -> "Matrix": + """Computes the Cholesky decomposition of a symmetric, positive definite matrix :param safe: default value is False, if flag is True additional checks to ensure that the matrix is symmetric positive definite are applied, if False, checks will be skipped :return: the OperationNode representing this operation """ - return Matrix(self.sds_context, 'cholesky', [self]) + return Matrix(self.sds_context, "cholesky", [self]) - def diag(self) -> 'Matrix': - """ Create diagonal matrix from (n x 1) matrix, or take diagonal from square matrix + def diag(self) -> "Matrix": + """Create diagonal matrix from (n x 1) matrix, or take diagonal from square matrix :return: the OperationNode representing this operation """ - return Matrix(self.sds_context, 'diag', [self]) + return Matrix(self.sds_context, "diag", [self]) - def svd(self) -> 'Matrix': + def svd(self) -> "Matrix": """ - Singular Value Decomposition of a matrix A (of size m x m), which decomposes into three matrices - U, V, and S as A = U %% S %% t(V), where U is an m x m unitary matrix (i.e., orthogonal), - V is an n x n unitary matrix (also orthogonal), + Singular Value Decomposition of a matrix A (of size m x m), which decomposes into three matrices + U, V, and S as A = U %% S %% t(V), where U is an m x m unitary matrix (i.e., orthogonal), + V is an n x n unitary matrix (also orthogonal), and S is an m x n matrix with non-negative real numbers on the diagonal. matrices U <(m x m)>, S <(m x n)>, and V <(n x n)> @@ -461,16 +519,17 @@ class Matrix(OperationNode): :return: The MultiReturn node containing the three Matrices U,S, and V """ - U = Matrix(self.sds_context, '') - S = Matrix(self.sds_context, '') - V = Matrix(self.sds_context, '') - output_nodes = [U, S, V ] + U = Matrix(self.sds_context, "") + S = Matrix(self.sds_context, "") + V = Matrix(self.sds_context, "") + output_nodes = [U, S, V] - op = MultiReturn(self.sds_context, 'svd', output_nodes, unnamed_input_nodes=[self]) + op = MultiReturn( + self.sds_context, "svd", output_nodes, unnamed_input_nodes=[self] + ) return op - - def eigen(self) -> 'Matrix': + def eigen(self) -> "Matrix": """ Computes Eigen decomposition of input matrix A. The Eigen decomposition consists of two matrices V and w such that A = V %*% diag(w) %*% t(V). The columns of V are the @@ -483,16 +542,17 @@ class Matrix(OperationNode): :return: The MultiReturn node containing the two Matrices w and V """ - - V = Matrix(self.sds_context, '') - w = Matrix(self.sds_context, '') - output_nodes = [w,V] - op = MultiReturn(self.sds_context, 'eigen', output_nodes, unnamed_input_nodes=[self]) + + V = Matrix(self.sds_context, "") + w = Matrix(self.sds_context, "") + output_nodes = [w, V] + op = MultiReturn( + self.sds_context, "eigen", output_nodes, unnamed_input_nodes=[self] + ) return op - - def to_one_hot(self, num_classes: int) -> 'Matrix': - """ OneHot encode the matrix. + def to_one_hot(self, num_classes: int) -> "Matrix": + """OneHot encode the matrix. It is assumed that there is only one column to encode, and all values are whole numbers > 0 @@ -503,34 +563,35 @@ class Matrix(OperationNode): raise ValueError("Number of classes should be larger than 1") named_input_nodes = {"X": self, "numClasses": num_classes} - return Matrix(self.sds_context, 'toOneHot', named_input_nodes=named_input_nodes) + return Matrix(self.sds_context, "toOneHot", named_input_nodes=named_input_nodes) - def rbind(self, other) -> 'Matrix': + def rbind(self, other) -> "Matrix": """ - Row-wise matrix concatenation, by concatenating the second matrix as additional rows to the first matrix. + Row-wise matrix concatenation, by concatenating the second matrix as additional rows to the first matrix. :param: The other matrix to bind to the right hand side :return: The OperationNode containing the concatenated matrices/frames. """ return Matrix(self.sds_context, "rbind", [self, other]) - def cbind(self, other) -> 'Matrix': + def cbind(self, other) -> "Matrix": """ - Column-wise matrix concatenation, by concatenating the second matrix as additional columns to the first matrix. + Column-wise matrix concatenation, by concatenating the second matrix as additional columns to the first matrix. :param: The other matrix to bind to the right hand side. :return: The OperationNode containing the concatenated matrices/frames. """ return Matrix(self.sds_context, "cbind", [self, other]) - def t(self) -> 'Matrix': - """ Transposes the input + def t(self) -> "Matrix": + """Transposes the input :return: the OperationNode representing this operation """ - return Matrix(self.sds_context, 't', [self]) + return Matrix(self.sds_context, "t", [self]) - def order(self, by: int = 1, decreasing: bool = False, - index_return: bool = False) -> 'Matrix': - """ Sort by a column of the matrix X in increasing/decreasing order and returns either the index or data + def order( + self, by: int = 1, decreasing: bool = False, index_return: bool = False + ) -> "Matrix": + """Sort by a column of the matrix X in increasing/decreasing order and returns either the index or data :param by: sort matrix by this column number :param decreasing: If true the matrix will be sorted in decreasing order @@ -538,59 +599,77 @@ class Matrix(OperationNode): :return: the OperationNode representing this operation """ - named_input_nodes = {'target': self, 'by': by, 'decreasing': str(decreasing).upper(), - 'index.return': str(index_return).upper()} + named_input_nodes = { + "target": self, + "by": by, + "decreasing": str(decreasing).upper(), + "index.return": str(index_return).upper(), + } - return Matrix(self.sds_context, 'order', [], named_input_nodes=named_input_nodes) + return Matrix( + self.sds_context, "order", [], named_input_nodes=named_input_nodes + ) - def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'Scalar': - """ Converts the input to a string representation. + def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> "Scalar": + """Converts the input to a string representation. :return: `Scalar` containing the string. """ - return Scalar(self.sds_context, 'toString', [self], kwargs, output_type=OutputType.STRING) + return Scalar( + self.sds_context, "toString", [self], kwargs, output_type=OutputType.STRING + ) - def isNA(self) -> 'Matrix': - """ Computes a boolean indicator matrix of the same shape as the input, indicating where NA (not available) + def isNA(self) -> "Matrix": + """Computes a boolean indicator matrix of the same shape as the input, indicating where NA (not available) values are located. Currently, NA is only capturing NaN values. :return: the OperationNode representing this operation """ - return Matrix(self.sds_context, 'isNA', [self]) + return Matrix(self.sds_context, "isNA", [self]) - def isNaN(self) -> 'Matrix': - """ Computes a boolean indicator matrix of the same shape as the input, indicating where NaN (not a number) + def isNaN(self) -> "Matrix": + """Computes a boolean indicator matrix of the same shape as the input, indicating where NaN (not a number) values are located. :return: the OperationNode representing this operation """ - return Matrix(self.sds_context, 'isNaN', [self]) + return Matrix(self.sds_context, "isNaN", [self]) - def isInf(self) -> 'Matrix': - """ Computes a boolean indicator matrix of the same shape as the input, indicating where Inf (positive or + def isInf(self) -> "Matrix": + """Computes a boolean indicator matrix of the same shape as the input, indicating where Inf (positive or negative infinity) values are located. :return: the OperationNode representing this operation """ - return Matrix(self.sds_context, 'isInf', [self]) + return Matrix(self.sds_context, "isInf", [self]) - def rev(self) -> 'Matrix': - """ Reverses the rows + def rev(self) -> "Matrix": + """Reverses the rows :return: the OperationNode representing this operation """ - return Matrix(self.sds_context, 'rev', [self]) + return Matrix(self.sds_context, "rev", [self]) - def round(self) -> 'Matrix': - """ round all values to nearest natural number + def round(self) -> "Matrix": + """round all values to nearest natural number :return: The Matrix representing the result of this operation """ return Matrix(self.sds_context, "round", [self]) - def replace(self, pattern: VALID_INPUT_TYPES, replacement: VALID_INPUT_TYPES) -> 'Matrix': + def replace( + self, pattern: VALID_INPUT_TYPES, replacement: VALID_INPUT_TYPES + ) -> "Matrix": """ Replace all values with replacement value """ - return Matrix(self.sds_context, "replace", named_input_nodes={"target": self, "pattern": pattern, "replacement": replacement}) + return Matrix( + self.sds_context, + "replace", + named_input_nodes={ + "target": self, + "pattern": pattern, + "replacement": replacement, + }, + ) def __str__(self): return "MatrixNode" diff --git a/src/main/python/tests/matrix/test_aggregations.py b/src/main/python/tests/matrix/test_aggregations.py index 8627d2547c..d02d5dfb3e 100644 --- a/src/main/python/tests/matrix/test_aggregations.py +++ b/src/main/python/tests/matrix/test_aggregations.py @@ -33,6 +33,7 @@ m2.shape = (dim, dim) m3 = np.array(np.random.randint(10, size=dim * dim * 10) + 1, dtype=np.double) m3.shape = (dim * 10, dim) + class TestMatrixAggFn(unittest.TestCase): sds: SystemDSContext = None @@ -45,148 +46,239 @@ class TestMatrixAggFn(unittest.TestCase): cls.sds.close() def test_sum1(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).sum().compute(), m1.sum())) + self.assertTrue(np.allclose(self.sds.from_numpy(m1).sum().compute(), m1.sum())) def test_sum2(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).sum(axis=0).compute(), m1.sum(axis=0))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).sum(axis=0).compute(), m1.sum(axis=0)) + ) def test_sum3(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).sum(axis=1).compute(), m1.sum(axis=1).reshape(dim, 1))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).sum(axis=1).compute(), + m1.sum(axis=1).reshape(dim, 1), + ) + ) def test_mean1(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).mean().compute(), m1.mean())) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).mean().compute(), m1.mean()) + ) def test_mean2(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).mean(axis=0).compute(), m1.mean(axis=0))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).mean(axis=0).compute(), m1.mean(axis=0)) + ) def test_mean3(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).mean(axis=1).compute(), m1.mean(axis=1).reshape(dim, 1))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).mean(axis=1).compute(), + m1.mean(axis=1).reshape(dim, 1), + ) + ) def test_full(self): - self.assertTrue(np.allclose( - self.sds.full((2, 3), 10.1).compute(), np.full((2, 3), 10.1))) + self.assertTrue( + np.allclose(self.sds.full((2, 3), 10.1).compute(), np.full((2, 3), 10.1)) + ) def test_seq(self): - self.assertTrue(np.allclose( - self.sds.seq(3).compute(), np.arange(4).reshape(4, 1))) + self.assertTrue( + np.allclose(self.sds.seq(3).compute(), np.arange(4).reshape(4, 1)) + ) def test_var1(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).var().compute(), m1.var(ddof=1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).var().compute(), m1.var(ddof=1)) + ) def test_var2(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).var(axis=0).compute(), m1.var(axis=0, ddof=1))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).var(axis=0).compute(), m1.var(axis=0, ddof=1) + ) + ) def test_var3(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).var(axis=1).compute(), m1.var(axis=1, ddof=1).reshape(dim, 1))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).var(axis=1).compute(), + m1.var(axis=1, ddof=1).reshape(dim, 1), + ) + ) def test_min1(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).min().compute(), m1.min())) + self.assertTrue(np.allclose(self.sds.from_numpy(m1).min().compute(), m1.min())) def test_min2(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).min(axis=0).compute(), m1.min(axis=0))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).min(axis=0).compute(), m1.min(axis=0)) + ) def test_min3(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).min(axis=1).compute(), m1.min(axis=1).reshape(dim, 1))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).min(axis=1).compute(), + m1.min(axis=1).reshape(dim, 1), + ) + ) def test_max1(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).max().compute(), m1.max())) + self.assertTrue(np.allclose(self.sds.from_numpy(m1).max().compute(), m1.max())) def test_max2(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).max(axis=0).compute(), m1.max(axis=0))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).max(axis=0).compute(), m1.max(axis=0)) + ) def test_max3(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).max(axis=1).compute(), m1.max(axis=1).reshape(dim, 1))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).max(axis=1).compute(), + m1.max(axis=1).reshape(dim, 1), + ) + ) def test_trace1(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).trace().compute(), m1.trace())) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).trace().compute(), m1.trace()) + ) def test_trace2(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m2).trace().compute(), m2.trace())) + self.assertTrue( + np.allclose(self.sds.from_numpy(m2).trace().compute(), m2.trace()) + ) def test_countDistinctApprox1(self): distinct = 100 - m = np.round(np.random.random((1000, 1000))*(distinct - 1)) + m = np.round(np.random.random((1000, 1000)) * (distinct - 1)) # allow and error of 1% - self.assertTrue(np.allclose( - self.sds.from_numpy(m).countDistinctApprox().compute(), len(np.unique(m)), 1)) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m).countDistinctApprox().compute(), + len(np.unique(m)), + 1, + ) + ) def test_countDistinctApprox2(self): distinct = 1000 - m = np.round(np.random.random((10000, 100))*(distinct - 1)) + m = np.round(np.random.random((10000, 100)) * (distinct - 1)) # allow and error of 1% - self.assertTrue(np.allclose( - self.sds.from_numpy(m).countDistinctApprox(0).compute(), [len(np.unique(col))*100 for col in m.T], 10)) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m).countDistinctApprox(0).compute(), + [len(np.unique(col)) * 100 for col in m.T], + 10, + ) + ) def test_countDistinctApprox3(self): distinct = 1000 - m = np.round(np.random.random((100, 10000))*(distinct - 1)) + m = np.round(np.random.random((100, 10000)) * (distinct - 1)) # allow and error of 1% - self.assertTrue(np.allclose( - self.sds.from_numpy(m).countDistinctApprox(1).compute(), np.array([[len(np.unique(col))] for col in m]), 10)) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m).countDistinctApprox(1).compute(), + np.array([[len(np.unique(col))] for col in m]), + 10, + ) + ) def test_countDistinctApprox4(self): m = np.round(np.random.random((2, 2))) with self.assertRaises(ValueError): self.sds.from_numpy(m).countDistinctApprox(2) - def test_countDistinct1(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).countDistinct().compute(), len(np.unique(m1)))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).countDistinct().compute(), len(np.unique(m1)) + ) + ) def test_countDistinct2(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m2).countDistinct().compute(), len(np.unique(m2)))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m2).countDistinct().compute(), len(np.unique(m2)) + ) + ) def test_countDistinct3(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m3).countDistinct().compute(), len(np.unique(m3)))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m3).countDistinct().compute(), len(np.unique(m3)) + ) + ) def test_countDistinct4(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).countDistinct(0).compute(), [len(np.unique(col)) for col in m1.T])) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).countDistinct(0).compute(), + [len(np.unique(col)) for col in m1.T], + ) + ) def test_countDistinct5(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m2).countDistinct(0).compute(), [len(np.unique(col)) for col in m2.T])) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m2).countDistinct(0).compute(), + [len(np.unique(col)) for col in m2.T], + ) + ) def test_countDistinct6(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m3).countDistinct(0).compute(), [len(np.unique(col)) for col in m3.T])) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m3).countDistinct(0).compute(), + [len(np.unique(col)) for col in m3.T], + ) + ) def test_countDistinct7(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).countDistinct(1).compute(), np.array([[len(np.unique(col))] for col in m1]))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).countDistinct(1).compute(), + np.array([[len(np.unique(col))] for col in m1]), + ) + ) def test_countDistinct8(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m2).countDistinct(1).compute(), np.array([[len(np.unique(col))] for col in m2]))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m2).countDistinct(1).compute(), + np.array([[len(np.unique(col))] for col in m2]), + ) + ) def test_countDistinct9(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m3).countDistinct(1).compute(), np.array([[len(np.unique(col))] for col in m3]))) + self.assertTrue( + np.allclose( + self.sds.from_numpy(m3).countDistinct(1).compute(), + np.array([[len(np.unique(col))] for col in m3]), + ) + ) def test_countDistinct10(self): with self.assertRaises(ValueError): self.sds.from_numpy(m3).countDistinct(2) + def test_sd1(self): + self.assertTrue( + np.allclose( + self.sds.from_numpy(m1).sd().compute(), np.std(m1, ddof=1), 1e-9 + ) + ) + + def test_sd2(self): + self.assertTrue( + np.allclose( + self.sds.from_numpy(m2).sd().compute(), np.std(m2, ddof=1), 1e-9 + ) + ) + if __name__ == "__main__": unittest.main(exit=False)