This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new afcc5d8ad3 [SYSTEMDS-3757] Python API Builtin solve, median, quantile
afcc5d8ad3 is described below

commit afcc5d8ad367d5c61dc08b45ac4904bd1731301b
Author: e-strauss <lathan...@gmx.de>
AuthorDate: Fri Sep 6 22:10:29 2024 +0200

    [SYSTEMDS-3757] Python API Builtin solve, median, quantile
    
    Closes #2102
---
 src/main/python/generator/generator.py             |   2 +-
 .../python/systemds/operator/algorithm/__init__.py |   2 +
 .../systemds/operator/algorithm/builtin/solve.py   |  36 +++++++
 src/main/python/systemds/operator/nodes/matrix.py  |  43 +++++++-
 src/main/python/tests/algorithms/test_solve.py     |  54 ++++++++++
 src/main/python/tests/matrix/test_quantile.py      | 110 +++++++++++++++++++++
 6 files changed, 241 insertions(+), 6 deletions(-)

diff --git a/src/main/python/generator/generator.py 
b/src/main/python/generator/generator.py
index cb502d74c8..b124feff19 100644
--- a/src/main/python/generator/generator.py
+++ b/src/main/python/generator/generator.py
@@ -28,7 +28,7 @@ from dml_parser import FunctionParser
 from typing import List, Tuple
 
 
-manually_added_algorithm_builtins = ["cov"]
+manually_added_algorithm_builtins = ["cov", "solve"]
 
 
 class PythonAPIFileGenerator(object):
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py 
b/src/main/python/systemds/operator/algorithm/__init__.py
index b3273a818a..fe3d40b34c 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -173,6 +173,7 @@ from .builtin.slicefinder import slicefinder
 from .builtin.smape import smape 
 from .builtin.smote import smote 
 from .builtin.softmax import softmax 
+from .builtin.solve import solve 
 from .builtin.split import split 
 from .builtin.splitBalanced import splitBalanced 
 from .builtin.stableMarriage import stableMarriage 
@@ -347,6 +348,7 @@ __all__ = ['WoE',
  'smape',
  'smote',
  'softmax',
+ 'solve',
  'split',
  'splitBalanced',
  'stableMarriage',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/solve.py 
b/src/main/python/systemds/operator/algorithm/builtin/solve.py
new file mode 100644
index 0000000000..6756e8ff02
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/solve.py
@@ -0,0 +1,36 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+
+from systemds.operator import Matrix, Scalar
+
+
+def solve(a: Matrix, b: Matrix) -> "Matrix":
+    """
+     Computes the least squares solution for system of linear equations A %*% 
x = b i.e., it finds x such that
+     ||A%*%x – b|| is minimized. The solution vector x is computed using a QR 
decomposition of A.
+
+    :param a: (m,n) matrix a
+    :param b: (m,1) matrix b
+    :return: (n, 1) matrix x
+    """
+
+    return Matrix(a.sds_context, "solve", [a, b])
diff --git a/src/main/python/systemds/operator/nodes/matrix.py 
b/src/main/python/systemds/operator/nodes/matrix.py
index c02a73e48c..a96f8d884c 100644
--- a/src/main/python/systemds/operator/nodes/matrix.py
+++ b/src/main/python/systemds/operator/nodes/matrix.py
@@ -306,7 +306,7 @@ class Matrix(OperationNode):
         """Calculate the number of distinct values of matrix.
 
         :param axis: can be 0 or 1 to do either row or column aggregation
-        :return: `Matrix` representing operation
+        :return: `OperationNode` representing operation
         """
         if axis == 0:
             return Matrix(self.sds_context, "colCountDistinct", [self])
@@ -321,7 +321,7 @@ class Matrix(OperationNode):
     def countDistinctApprox(self, axis: int = None) -> "OperationNode":
         """Calculate the approximate number of distinct values of matrix.
         :param axis: can be 0 or 1 to do either row or column aggregation
-        :return: `Matrix` representing operation
+        :return: `OperationNode` representing operation
         """
         if axis == 0:
             return Matrix(self.sds_context, "colCountDistinctApprox", [self])
@@ -337,7 +337,7 @@ class Matrix(OperationNode):
         """Calculate variance of matrix.
 
         :param axis: can be 0 or 1 to do either row or column vars
-        :return: `Matrix` representing operation
+        :return: `OperationNode` representing operation
         """
         if axis == 0:
             return Matrix(self.sds_context, "colVars", [self])
@@ -352,7 +352,7 @@ class Matrix(OperationNode):
     def trace(self) -> "Scalar":
         """Calculate trace.
 
-        :return: `Matrix` representing operation
+        :return: `Scalar` representing operation
         """
         return Scalar(self.sds_context, "trace", [self])
 
@@ -381,7 +381,7 @@ class Matrix(OperationNode):
     def sd(self) -> "Scalar":
         """Calculate standard deviation of matrix.
 
-        :return: `Matrix` representing operation
+        :return: `Scalar` representing operation
         """
         return Scalar(self.sds_context, "sd", [self])
 
@@ -770,5 +770,38 @@ class Matrix(OperationNode):
         )
         return op
 
+    def median(self, weights: "Matrix" = None) -> "Scalar":
+        """Calculate median of a column matrix.
+
+        :return: `Scalar` representing operation
+        """
+        if weights is None:
+            return Scalar(self.sds_context, "median", [self])
+        else:
+            return Scalar(self.sds_context, "median", [self, weights])
+
+    def quantile(self, p, weights: "Matrix" = None) -> "OperationNode":
+        """Returns a column matrix with list of all quantiles requested in P.
+
+        :param p: float for a single quantile or column matrix of requested 
quantiles
+        :param weights: (optional) weights matrix of the same shape as self
+        :return: `Matrix` or 'Scalar' representing operation
+        """
+        if weights is None:
+            input_nodes = [self, p]
+        else:
+            input_nodes = [self, weights, p]
+
+        if isinstance(p, Matrix):
+            return Matrix(self.sds_context, "quantile", input_nodes)
+        elif isinstance(p, float):
+            if 0.0 <= p <= 1.0:
+                input_nodes[-1] = self.sds_context.scalar(input_nodes[-1])
+            else:
+                raise ValueError("Quantile has to be between 0 and 1")
+            return Scalar(self.sds_context, "quantile", input_nodes)
+        else:
+            raise ValueError("P has to be a Scalar or Matrix")
+
     def __str__(self):
         return "MatrixNode"
diff --git a/src/main/python/tests/algorithms/test_solve.py 
b/src/main/python/tests/algorithms/test_solve.py
new file mode 100644
index 0000000000..ef7c331b30
--- /dev/null
+++ b/src/main/python/tests/algorithms/test_solve.py
@@ -0,0 +1,54 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+
+from systemds.context import SystemDSContext
+from systemds.operator.algorithm import solve
+
+
+np.random.seed(7)
+A = np.random.random((10, 10))
+B = np.random.random(10)
+
+
+class TestSOLVE(unittest.TestCase):
+
+    sds: SystemDSContext = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.sds = SystemDSContext()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.sds.close()
+
+    def test_solve(self):
+        sds_result = solve(self.sds.from_numpy(A), 
self.sds.from_numpy(B)).compute()
+        np_result = np.linalg.solve(A, B).reshape((-1, 1))
+        self.assertTrue(np.allclose(sds_result, np_result, 1e-9))
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
diff --git a/src/main/python/tests/matrix/test_quantile.py 
b/src/main/python/tests/matrix/test_quantile.py
new file mode 100644
index 0000000000..1993090297
--- /dev/null
+++ b/src/main/python/tests/matrix/test_quantile.py
@@ -0,0 +1,110 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+
+np.random.seed(7)
+m = np.random.random_integers(9, size=100)
+M = np.random.random_integers(9, size=300).reshape(100, 3)
+p = np.array([0.25, 0.5, 0.75])
+m2 = np.array([1, 2, 3, 4, 5])
+w2 = np.array([1, 1, 1, 1, 5])
+
+
+def weighted_quantiles(values, weights, quantiles=0.5):
+    i = np.argsort(values)
+    c = np.cumsum(weights[i])
+    return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]]
+
+
+class TestQUANTILE(unittest.TestCase):
+    def setUp(self):
+        self.sds = SystemDSContext()
+
+    def tearDown(self):
+        self.sds.close()
+
+    def test_median_random1(self):
+        sds_input = self.sds.from_numpy(m)
+        sds_result = sds_input.median().compute()
+        np_result = np.median(m)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_median_random2(self):
+        with self.assertRaises(RuntimeError):
+            sds_input = self.sds.from_numpy(M)
+            sds_input.median().compute()
+
+    def test_weighted_median(self):
+        sds_input = self.sds.from_numpy(m2)
+        sds_input2 = self.sds.from_numpy(w2)
+        sds_result = sds_input.median(sds_input2).compute()
+        np_result = weighted_quantiles(m2, w2)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_quantile1(self):
+        sds_p = self.sds.from_numpy(p)
+        sds_result = self.sds.from_numpy(m).quantile(sds_p).compute()
+        np_result = np.array(
+            [weighted_quantiles(m, np.ones(m.shape), quantiles=q) for q in p]
+        ).reshape(-1, 1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_quantile2(self):
+        sds_p = self.sds.from_numpy(p)
+        sds_result = self.sds.from_numpy(m2).quantile(sds_p).compute()
+        np_result = np.array(
+            [weighted_quantiles(m2, np.ones(m.shape), quantiles=q) for q in p]
+        ).reshape(-1, 1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_quantile3(self):
+        sds_p = self.sds.from_numpy(p)
+        sds_w = self.sds.from_numpy(w2)
+        sds_result = self.sds.from_numpy(m2).quantile(sds_p, sds_w).compute()
+        np_result = np.array(
+            [weighted_quantiles(m2, w2, quantiles=q) for q in p]
+        ).reshape(-1, 1)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_quantile4(self):
+        sds_w = self.sds.from_numpy(w2)
+        quant = 0.3
+        sds_result = self.sds.from_numpy(m2).quantile(quant, sds_w).compute()
+        np_result = weighted_quantiles(m2, w2, quantiles=quant)
+        assert np.allclose(sds_result, np_result, 1e-9)
+
+    def test_quantile5(self):
+        sds_w = self.sds.from_numpy(w2)
+        with self.assertRaises(ValueError):
+            self.sds.from_numpy(m2).quantile("0.5", sds_w)
+
+    def test_quantile6(self):
+        sds_w = self.sds.from_numpy(w2)
+        quant = 1.3
+        with self.assertRaises(ValueError):
+            self.sds.from_numpy(m2).quantile(quant, sds_w)
+
+
+if __name__ == "__main__":
+    unittest.main()

Reply via email to