This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new c50454930d [MINOR] Update Python API
c50454930d is described below
commit c50454930d7f43d6d46ca1e658efb6064d9e3f4e
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Mon Dec 30 14:08:49 2024 +0100
[MINOR] Update Python API
---
.../python/systemds/operator/algorithm/__init__.py | 2 +
.../systemds/operator/algorithm/builtin/adasyn.py | 62 ++++++++++++++++++++++
.../operator/algorithm/builtin/incSliceLine.py | 5 +-
3 files changed, 67 insertions(+), 2 deletions(-)
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py
b/src/main/python/systemds/operator/algorithm/__init__.py
index fe3d40b34c..95eb5dd207 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -24,6 +24,7 @@
from .builtin.WoE import WoE
from .builtin.WoEApply import WoEApply
from .builtin.abstain import abstain
+from .builtin.adasyn import adasyn
from .builtin.als import als
from .builtin.alsCG import alsCG
from .builtin.alsDS import alsDS
@@ -199,6 +200,7 @@ from .builtin.xgboostPredictRegression import
xgboostPredictRegression
__all__ = ['WoE',
'WoEApply',
'abstain',
+ 'adasyn',
'als',
'alsCG',
'alsDS',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
b/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
new file mode 100644
index 0000000000..a45c3c9625
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/adasyn.py
@@ -0,0 +1,62 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/adasyn.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn,
Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def adasyn(X: Matrix,
+ Y: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ """
+ Builtin function for handing class imbalance using Adaptive Synthetic
Sampling (ADASYN)
+ by Haibo He et. al. In International Joint Conference on Neural Networks
(2008). 1322-1328
+
+
+
+ :param X: Feature matrix [shape: n-by-m]
+ :param Y: Class labels [shape: n-by-1]
+ :param k: Number of nearest neighbors
+ :param beta: Desired balance level after generation of synthetic data [0,
1]
+ :param dth: Distribution threshold
+ :param seed: Seed for randomized data point selection
+ :return: Feature matrix of n original rows followed by G = (ml-ms)*beta
synthetic rows
+ :return: Class labels aligned with output X
+ """
+
+ params_dict = {'X': X, 'Y': Y}
+ params_dict.update(kwargs)
+
+ vX_0 = Matrix(X.sds_context, '')
+ vX_1 = Matrix(X.sds_context, '')
+ output_nodes = [vX_0, vX_1, ]
+
+ op = MultiReturn(X.sds_context, 'adasyn', output_nodes,
named_input_nodes=params_dict)
+
+ vX_0._unnamed_input_nodes = [op]
+ vX_1._unnamed_input_nodes = [op]
+
+ return op
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
index e3bb1889f4..f49dbcda41 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
@@ -65,8 +65,9 @@ def incSliceLine(addedX: Matrix,
:param prevTK: previous top-k slices (for incremental updates)
:param prevTKC: previous top-k scores (for incremental updates)
:param encodeLat: flag for encoding output lattice for less memory
consumption
- :param pruningStrat: flag for disabling certain pruning strategies
- (0 all, 1 all exact (score and size), 2 no score, 3 no size, 4 none)
+ :param pruningStrat: pruning strategy: 0 all pruning, 1 all exact pruning,
+ 2 only score pruning, 3 only max score pruning,
+ 4 only size pruning, 5 no pruning
:return: top-k slices (k x ncol(totalX) if successful)
:return: score, size, error of slices (k x 3)
:return: debug matrix, populated with enumeration stats if verbose