This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new afcc5d8ad3 [SYSTEMDS-3757] Python API Builtin solve, median, quantile afcc5d8ad3 is described below commit afcc5d8ad367d5c61dc08b45ac4904bd1731301b Author: e-strauss <lathan...@gmx.de> AuthorDate: Fri Sep 6 22:10:29 2024 +0200 [SYSTEMDS-3757] Python API Builtin solve, median, quantile Closes #2102 --- src/main/python/generator/generator.py | 2 +- .../python/systemds/operator/algorithm/__init__.py | 2 + .../systemds/operator/algorithm/builtin/solve.py | 36 +++++++ src/main/python/systemds/operator/nodes/matrix.py | 43 +++++++- src/main/python/tests/algorithms/test_solve.py | 54 ++++++++++ src/main/python/tests/matrix/test_quantile.py | 110 +++++++++++++++++++++ 6 files changed, 241 insertions(+), 6 deletions(-) diff --git a/src/main/python/generator/generator.py b/src/main/python/generator/generator.py index cb502d74c8..b124feff19 100644 --- a/src/main/python/generator/generator.py +++ b/src/main/python/generator/generator.py @@ -28,7 +28,7 @@ from dml_parser import FunctionParser from typing import List, Tuple -manually_added_algorithm_builtins = ["cov"] +manually_added_algorithm_builtins = ["cov", "solve"] class PythonAPIFileGenerator(object): diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py index b3273a818a..fe3d40b34c 100644 --- a/src/main/python/systemds/operator/algorithm/__init__.py +++ b/src/main/python/systemds/operator/algorithm/__init__.py @@ -173,6 +173,7 @@ from .builtin.slicefinder import slicefinder from .builtin.smape import smape from .builtin.smote import smote from .builtin.softmax import softmax +from .builtin.solve import solve from .builtin.split import split from .builtin.splitBalanced import splitBalanced from .builtin.stableMarriage import stableMarriage @@ -347,6 +348,7 @@ __all__ = ['WoE', 'smape', 'smote', 'softmax', + 'solve', 'split', 'splitBalanced', 'stableMarriage', diff --git a/src/main/python/systemds/operator/algorithm/builtin/solve.py b/src/main/python/systemds/operator/algorithm/builtin/solve.py new file mode 100644 index 0000000000..6756e8ff02 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/solve.py @@ -0,0 +1,36 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + + +from systemds.operator import Matrix, Scalar + + +def solve(a: Matrix, b: Matrix) -> "Matrix": + """ + Computes the least squares solution for system of linear equations A %*% x = b i.e., it finds x such that + ||A%*%x – b|| is minimized. The solution vector x is computed using a QR decomposition of A. + + :param a: (m,n) matrix a + :param b: (m,1) matrix b + :return: (n, 1) matrix x + """ + + return Matrix(a.sds_context, "solve", [a, b]) diff --git a/src/main/python/systemds/operator/nodes/matrix.py b/src/main/python/systemds/operator/nodes/matrix.py index c02a73e48c..a96f8d884c 100644 --- a/src/main/python/systemds/operator/nodes/matrix.py +++ b/src/main/python/systemds/operator/nodes/matrix.py @@ -306,7 +306,7 @@ class Matrix(OperationNode): """Calculate the number of distinct values of matrix. :param axis: can be 0 or 1 to do either row or column aggregation - :return: `Matrix` representing operation + :return: `OperationNode` representing operation """ if axis == 0: return Matrix(self.sds_context, "colCountDistinct", [self]) @@ -321,7 +321,7 @@ class Matrix(OperationNode): def countDistinctApprox(self, axis: int = None) -> "OperationNode": """Calculate the approximate number of distinct values of matrix. :param axis: can be 0 or 1 to do either row or column aggregation - :return: `Matrix` representing operation + :return: `OperationNode` representing operation """ if axis == 0: return Matrix(self.sds_context, "colCountDistinctApprox", [self]) @@ -337,7 +337,7 @@ class Matrix(OperationNode): """Calculate variance of matrix. :param axis: can be 0 or 1 to do either row or column vars - :return: `Matrix` representing operation + :return: `OperationNode` representing operation """ if axis == 0: return Matrix(self.sds_context, "colVars", [self]) @@ -352,7 +352,7 @@ class Matrix(OperationNode): def trace(self) -> "Scalar": """Calculate trace. - :return: `Matrix` representing operation + :return: `Scalar` representing operation """ return Scalar(self.sds_context, "trace", [self]) @@ -381,7 +381,7 @@ class Matrix(OperationNode): def sd(self) -> "Scalar": """Calculate standard deviation of matrix. - :return: `Matrix` representing operation + :return: `Scalar` representing operation """ return Scalar(self.sds_context, "sd", [self]) @@ -770,5 +770,38 @@ class Matrix(OperationNode): ) return op + def median(self, weights: "Matrix" = None) -> "Scalar": + """Calculate median of a column matrix. + + :return: `Scalar` representing operation + """ + if weights is None: + return Scalar(self.sds_context, "median", [self]) + else: + return Scalar(self.sds_context, "median", [self, weights]) + + def quantile(self, p, weights: "Matrix" = None) -> "OperationNode": + """Returns a column matrix with list of all quantiles requested in P. + + :param p: float for a single quantile or column matrix of requested quantiles + :param weights: (optional) weights matrix of the same shape as self + :return: `Matrix` or 'Scalar' representing operation + """ + if weights is None: + input_nodes = [self, p] + else: + input_nodes = [self, weights, p] + + if isinstance(p, Matrix): + return Matrix(self.sds_context, "quantile", input_nodes) + elif isinstance(p, float): + if 0.0 <= p <= 1.0: + input_nodes[-1] = self.sds_context.scalar(input_nodes[-1]) + else: + raise ValueError("Quantile has to be between 0 and 1") + return Scalar(self.sds_context, "quantile", input_nodes) + else: + raise ValueError("P has to be a Scalar or Matrix") + def __str__(self): return "MatrixNode" diff --git a/src/main/python/tests/algorithms/test_solve.py b/src/main/python/tests/algorithms/test_solve.py new file mode 100644 index 0000000000..ef7c331b30 --- /dev/null +++ b/src/main/python/tests/algorithms/test_solve.py @@ -0,0 +1,54 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import unittest + +import numpy as np + +from systemds.context import SystemDSContext +from systemds.operator.algorithm import solve + + +np.random.seed(7) +A = np.random.random((10, 10)) +B = np.random.random(10) + + +class TestSOLVE(unittest.TestCase): + + sds: SystemDSContext = None + + @classmethod + def setUpClass(cls): + cls.sds = SystemDSContext() + + @classmethod + def tearDownClass(cls): + cls.sds.close() + + def test_solve(self): + sds_result = solve(self.sds.from_numpy(A), self.sds.from_numpy(B)).compute() + np_result = np.linalg.solve(A, B).reshape((-1, 1)) + self.assertTrue(np.allclose(sds_result, np_result, 1e-9)) + + +if __name__ == "__main__": + unittest.main(exit=False) diff --git a/src/main/python/tests/matrix/test_quantile.py b/src/main/python/tests/matrix/test_quantile.py new file mode 100644 index 0000000000..1993090297 --- /dev/null +++ b/src/main/python/tests/matrix/test_quantile.py @@ -0,0 +1,110 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import unittest +import numpy as np +from systemds.context import SystemDSContext + +np.random.seed(7) +m = np.random.random_integers(9, size=100) +M = np.random.random_integers(9, size=300).reshape(100, 3) +p = np.array([0.25, 0.5, 0.75]) +m2 = np.array([1, 2, 3, 4, 5]) +w2 = np.array([1, 1, 1, 1, 5]) + + +def weighted_quantiles(values, weights, quantiles=0.5): + i = np.argsort(values) + c = np.cumsum(weights[i]) + return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]] + + +class TestQUANTILE(unittest.TestCase): + def setUp(self): + self.sds = SystemDSContext() + + def tearDown(self): + self.sds.close() + + def test_median_random1(self): + sds_input = self.sds.from_numpy(m) + sds_result = sds_input.median().compute() + np_result = np.median(m) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_median_random2(self): + with self.assertRaises(RuntimeError): + sds_input = self.sds.from_numpy(M) + sds_input.median().compute() + + def test_weighted_median(self): + sds_input = self.sds.from_numpy(m2) + sds_input2 = self.sds.from_numpy(w2) + sds_result = sds_input.median(sds_input2).compute() + np_result = weighted_quantiles(m2, w2) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_quantile1(self): + sds_p = self.sds.from_numpy(p) + sds_result = self.sds.from_numpy(m).quantile(sds_p).compute() + np_result = np.array( + [weighted_quantiles(m, np.ones(m.shape), quantiles=q) for q in p] + ).reshape(-1, 1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_quantile2(self): + sds_p = self.sds.from_numpy(p) + sds_result = self.sds.from_numpy(m2).quantile(sds_p).compute() + np_result = np.array( + [weighted_quantiles(m2, np.ones(m.shape), quantiles=q) for q in p] + ).reshape(-1, 1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_quantile3(self): + sds_p = self.sds.from_numpy(p) + sds_w = self.sds.from_numpy(w2) + sds_result = self.sds.from_numpy(m2).quantile(sds_p, sds_w).compute() + np_result = np.array( + [weighted_quantiles(m2, w2, quantiles=q) for q in p] + ).reshape(-1, 1) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_quantile4(self): + sds_w = self.sds.from_numpy(w2) + quant = 0.3 + sds_result = self.sds.from_numpy(m2).quantile(quant, sds_w).compute() + np_result = weighted_quantiles(m2, w2, quantiles=quant) + assert np.allclose(sds_result, np_result, 1e-9) + + def test_quantile5(self): + sds_w = self.sds.from_numpy(w2) + with self.assertRaises(ValueError): + self.sds.from_numpy(m2).quantile("0.5", sds_w) + + def test_quantile6(self): + sds_w = self.sds.from_numpy(w2) + quant = 1.3 + with self.assertRaises(ValueError): + self.sds.from_numpy(m2).quantile(quant, sds_w) + + +if __name__ == "__main__": + unittest.main()