This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit eee15cca444ab777d2955b624b8dd3073ba899c8 Author: baunsgaard <[email protected]> AuthorDate: Fri Dec 3 16:03:17 2021 +0100 [SYSTEMDS-3238] Python GMM test This commit adds a small GMM test in python for outlier/anomaly detection. --- .../test_source_list.py => algorithms/test_gmm.py} | 53 +++++++++++----------- src/main/python/tests/source/test_source_list.py | 2 +- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/main/python/tests/source/test_source_list.py b/src/main/python/tests/algorithms/test_gmm.py similarity index 51% copy from src/main/python/tests/source/test_source_list.py copy to src/main/python/tests/algorithms/test_gmm.py index d4ab391..066c957 100644 --- a/src/main/python/tests/source/test_source_list.py +++ b/src/main/python/tests/algorithms/test_gmm.py @@ -21,15 +21,13 @@ import unittest -import numpy as np from systemds.context import SystemDSContext -from systemds.operator.algorithm.builtin.scale import scale +from systemds.operator.algorithm import gmm, gmmPredict -class TestSource_01(unittest.TestCase): +class TestGMM(unittest.TestCase): sds: SystemDSContext = None - source_path: str = "./tests/source/source_with_list_input.dml" @classmethod def setUpClass(cls): @@ -39,25 +37,28 @@ class TestSource_01(unittest.TestCase): def tearDownClass(cls): cls.sds.close() - def test_single_return(self): - arr = self.sds.array(self.sds.full((10, 10), 4)) - c = self.sds.source(self.source_path, "test").func(arr) - res = c.sum().compute() - self.assertTrue(res == 10*10*4) - - def test_input_multireturn(self): - m = self.sds.full((10, 10), 2) - [a, b, c] = scale(m, True, True) - arr = self.sds.array(a, b, c) - c = self.sds.source(self.source_path, "test").func(arr) - res = c.sum().compute(verbose=True) - self.assertTrue(res == 0) - - # [SYSTEMDS-3224] https://issues.apache.org/jira/browse/SYSTEMDS-3224 - # def test_multi_return(self): - # arr = self.sds.array( - # self.sds.full((10, 10), 4), - # self.sds.full((3, 3), 5)) - # [b, c] = self.sds.source(self.source_path, "test", True).func2(arr) - # res = c.sum().compute() - # self.assertTrue(res == 10*10*4) + def test_lm_simple(self): + a = self.sds.rand(500, 10, -100, 100, pdf="normal", seed=10) + features = a # training data all not outliers + + notOutliers = self.sds.rand(10, 10, -1, 1, seed=10) # inside a + outliers = self.sds.rand(10, 10, 1150, 1200, seed=10) # outliers + + test = outliers.rbind(notOutliers) # testing data half outliers + + n_gaussian = 4 + + [_, _, _, _, mu, precision_cholesky, wight] = gmm( + features, False, n_components=n_gaussian, seed=10) + + [_, pp] = gmmPredict( + test, wight, mu, precision_cholesky, model=self.sds.scalar("VVV")) + + outliers = pp.max(axis=1) < 0.99 + ret = outliers.compute() + + self.assertTrue(ret.sum() == 10) + + +if __name__ == "__main__": + unittest.main(exit=False) diff --git a/src/main/python/tests/source/test_source_list.py b/src/main/python/tests/source/test_source_list.py index d4ab391..02e63e9 100644 --- a/src/main/python/tests/source/test_source_list.py +++ b/src/main/python/tests/source/test_source_list.py @@ -50,7 +50,7 @@ class TestSource_01(unittest.TestCase): [a, b, c] = scale(m, True, True) arr = self.sds.array(a, b, c) c = self.sds.source(self.source_path, "test").func(arr) - res = c.sum().compute(verbose=True) + res = c.sum().compute() self.assertTrue(res == 0) # [SYSTEMDS-3224] https://issues.apache.org/jira/browse/SYSTEMDS-3224
