This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new afcc5d8ad3 [SYSTEMDS-3757] Python API Builtin solve, median, quantile
afcc5d8ad3 is described below
commit afcc5d8ad367d5c61dc08b45ac4904bd1731301b
Author: e-strauss <[email protected]>
AuthorDate: Fri Sep 6 22:10:29 2024 +0200
[SYSTEMDS-3757] Python API Builtin solve, median, quantile
Closes #2102
---
src/main/python/generator/generator.py | 2 +-
.../python/systemds/operator/algorithm/__init__.py | 2 +
.../systemds/operator/algorithm/builtin/solve.py | 36 +++++++
src/main/python/systemds/operator/nodes/matrix.py | 43 +++++++-
src/main/python/tests/algorithms/test_solve.py | 54 ++++++++++
src/main/python/tests/matrix/test_quantile.py | 110 +++++++++++++++++++++
6 files changed, 241 insertions(+), 6 deletions(-)
diff --git a/src/main/python/generator/generator.py
b/src/main/python/generator/generator.py
index cb502d74c8..b124feff19 100644
--- a/src/main/python/generator/generator.py
+++ b/src/main/python/generator/generator.py
@@ -28,7 +28,7 @@ from dml_parser import FunctionParser
from typing import List, Tuple
-manually_added_algorithm_builtins = ["cov"]
+manually_added_algorithm_builtins = ["cov", "solve"]
class PythonAPIFileGenerator(object):
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py
b/src/main/python/systemds/operator/algorithm/__init__.py
index b3273a818a..fe3d40b34c 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -173,6 +173,7 @@ from .builtin.slicefinder import slicefinder
from .builtin.smape import smape
from .builtin.smote import smote
from .builtin.softmax import softmax
+from .builtin.solve import solve
from .builtin.split import split
from .builtin.splitBalanced import splitBalanced
from .builtin.stableMarriage import stableMarriage
@@ -347,6 +348,7 @@ __all__ = ['WoE',
'smape',
'smote',
'softmax',
+ 'solve',
'split',
'splitBalanced',
'stableMarriage',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/solve.py
b/src/main/python/systemds/operator/algorithm/builtin/solve.py
new file mode 100644
index 0000000000..6756e8ff02
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/solve.py
@@ -0,0 +1,36 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+
+from systemds.operator import Matrix, Scalar
+
+
+def solve(a: Matrix, b: Matrix) -> "Matrix":
+ """
+ Computes the least squares solution for system of linear equations A %*%
x = b i.e., it finds x such that
+ ||A%*%x – b|| is minimized. The solution vector x is computed using a QR
decomposition of A.
+
+ :param a: (m,n) matrix a
+ :param b: (m,1) matrix b
+ :return: (n, 1) matrix x
+ """
+
+ return Matrix(a.sds_context, "solve", [a, b])
diff --git a/src/main/python/systemds/operator/nodes/matrix.py
b/src/main/python/systemds/operator/nodes/matrix.py
index c02a73e48c..a96f8d884c 100644
--- a/src/main/python/systemds/operator/nodes/matrix.py
+++ b/src/main/python/systemds/operator/nodes/matrix.py
@@ -306,7 +306,7 @@ class Matrix(OperationNode):
"""Calculate the number of distinct values of matrix.
:param axis: can be 0 or 1 to do either row or column aggregation
- :return: `Matrix` representing operation
+ :return: `OperationNode` representing operation
"""
if axis == 0:
return Matrix(self.sds_context, "colCountDistinct", [self])
@@ -321,7 +321,7 @@ class Matrix(OperationNode):
def countDistinctApprox(self, axis: int = None) -> "OperationNode":
"""Calculate the approximate number of distinct values of matrix.
:param axis: can be 0 or 1 to do either row or column aggregation
- :return: `Matrix` representing operation
+ :return: `OperationNode` representing operation
"""
if axis == 0:
return Matrix(self.sds_context, "colCountDistinctApprox", [self])
@@ -337,7 +337,7 @@ class Matrix(OperationNode):
"""Calculate variance of matrix.
:param axis: can be 0 or 1 to do either row or column vars
- :return: `Matrix` representing operation
+ :return: `OperationNode` representing operation
"""
if axis == 0:
return Matrix(self.sds_context, "colVars", [self])
@@ -352,7 +352,7 @@ class Matrix(OperationNode):
def trace(self) -> "Scalar":
"""Calculate trace.
- :return: `Matrix` representing operation
+ :return: `Scalar` representing operation
"""
return Scalar(self.sds_context, "trace", [self])
@@ -381,7 +381,7 @@ class Matrix(OperationNode):
def sd(self) -> "Scalar":
"""Calculate standard deviation of matrix.
- :return: `Matrix` representing operation
+ :return: `Scalar` representing operation
"""
return Scalar(self.sds_context, "sd", [self])
@@ -770,5 +770,38 @@ class Matrix(OperationNode):
)
return op
+ def median(self, weights: "Matrix" = None) -> "Scalar":
+ """Calculate median of a column matrix.
+
+ :return: `Scalar` representing operation
+ """
+ if weights is None:
+ return Scalar(self.sds_context, "median", [self])
+ else:
+ return Scalar(self.sds_context, "median", [self, weights])
+
+ def quantile(self, p, weights: "Matrix" = None) -> "OperationNode":
+ """Returns a column matrix with list of all quantiles requested in P.
+
+ :param p: float for a single quantile or column matrix of requested
quantiles
+ :param weights: (optional) weights matrix of the same shape as self
+ :return: `Matrix` or 'Scalar' representing operation
+ """
+ if weights is None:
+ input_nodes = [self, p]
+ else:
+ input_nodes = [self, weights, p]
+
+ if isinstance(p, Matrix):
+ return Matrix(self.sds_context, "quantile", input_nodes)
+ elif isinstance(p, float):
+ if 0.0 <= p <= 1.0:
+ input_nodes[-1] = self.sds_context.scalar(input_nodes[-1])
+ else:
+ raise ValueError("Quantile has to be between 0 and 1")
+ return Scalar(self.sds_context, "quantile", input_nodes)
+ else:
+ raise ValueError("P has to be a Scalar or Matrix")
+
def __str__(self):
return "MatrixNode"
diff --git a/src/main/python/tests/algorithms/test_solve.py
b/src/main/python/tests/algorithms/test_solve.py
new file mode 100644
index 0000000000..ef7c331b30
--- /dev/null
+++ b/src/main/python/tests/algorithms/test_solve.py
@@ -0,0 +1,54 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+
+from systemds.context import SystemDSContext
+from systemds.operator.algorithm import solve
+
+
+np.random.seed(7)
+A = np.random.random((10, 10))
+B = np.random.random(10)
+
+
+class TestSOLVE(unittest.TestCase):
+
+ sds: SystemDSContext = None
+
+ @classmethod
+ def setUpClass(cls):
+ cls.sds = SystemDSContext()
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.sds.close()
+
+ def test_solve(self):
+ sds_result = solve(self.sds.from_numpy(A),
self.sds.from_numpy(B)).compute()
+ np_result = np.linalg.solve(A, B).reshape((-1, 1))
+ self.assertTrue(np.allclose(sds_result, np_result, 1e-9))
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)
diff --git a/src/main/python/tests/matrix/test_quantile.py
b/src/main/python/tests/matrix/test_quantile.py
new file mode 100644
index 0000000000..1993090297
--- /dev/null
+++ b/src/main/python/tests/matrix/test_quantile.py
@@ -0,0 +1,110 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import unittest
+import numpy as np
+from systemds.context import SystemDSContext
+
+np.random.seed(7)
+m = np.random.random_integers(9, size=100)
+M = np.random.random_integers(9, size=300).reshape(100, 3)
+p = np.array([0.25, 0.5, 0.75])
+m2 = np.array([1, 2, 3, 4, 5])
+w2 = np.array([1, 1, 1, 1, 5])
+
+
+def weighted_quantiles(values, weights, quantiles=0.5):
+ i = np.argsort(values)
+ c = np.cumsum(weights[i])
+ return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]]
+
+
+class TestQUANTILE(unittest.TestCase):
+ def setUp(self):
+ self.sds = SystemDSContext()
+
+ def tearDown(self):
+ self.sds.close()
+
+ def test_median_random1(self):
+ sds_input = self.sds.from_numpy(m)
+ sds_result = sds_input.median().compute()
+ np_result = np.median(m)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_median_random2(self):
+ with self.assertRaises(RuntimeError):
+ sds_input = self.sds.from_numpy(M)
+ sds_input.median().compute()
+
+ def test_weighted_median(self):
+ sds_input = self.sds.from_numpy(m2)
+ sds_input2 = self.sds.from_numpy(w2)
+ sds_result = sds_input.median(sds_input2).compute()
+ np_result = weighted_quantiles(m2, w2)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_quantile1(self):
+ sds_p = self.sds.from_numpy(p)
+ sds_result = self.sds.from_numpy(m).quantile(sds_p).compute()
+ np_result = np.array(
+ [weighted_quantiles(m, np.ones(m.shape), quantiles=q) for q in p]
+ ).reshape(-1, 1)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_quantile2(self):
+ sds_p = self.sds.from_numpy(p)
+ sds_result = self.sds.from_numpy(m2).quantile(sds_p).compute()
+ np_result = np.array(
+ [weighted_quantiles(m2, np.ones(m.shape), quantiles=q) for q in p]
+ ).reshape(-1, 1)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_quantile3(self):
+ sds_p = self.sds.from_numpy(p)
+ sds_w = self.sds.from_numpy(w2)
+ sds_result = self.sds.from_numpy(m2).quantile(sds_p, sds_w).compute()
+ np_result = np.array(
+ [weighted_quantiles(m2, w2, quantiles=q) for q in p]
+ ).reshape(-1, 1)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_quantile4(self):
+ sds_w = self.sds.from_numpy(w2)
+ quant = 0.3
+ sds_result = self.sds.from_numpy(m2).quantile(quant, sds_w).compute()
+ np_result = weighted_quantiles(m2, w2, quantiles=quant)
+ assert np.allclose(sds_result, np_result, 1e-9)
+
+ def test_quantile5(self):
+ sds_w = self.sds.from_numpy(w2)
+ with self.assertRaises(ValueError):
+ self.sds.from_numpy(m2).quantile("0.5", sds_w)
+
+ def test_quantile6(self):
+ sds_w = self.sds.from_numpy(w2)
+ quant = 1.3
+ with self.assertRaises(ValueError):
+ self.sds.from_numpy(m2).quantile(quant, sds_w)
+
+
+if __name__ == "__main__":
+ unittest.main()