This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new fe4b4a29fb [MINOR] Python API: manual generator option
fe4b4a29fb is described below
commit fe4b4a29fbefc61be36d18e6c14e30e67b6566ab
Author: e-strauss <[email protected]>
AuthorDate: Wed Sep 4 19:15:38 2024 +0200
[MINOR] Python API: manual generator option
This commit adds a manual option for adding
algorithm builtin files to the python algorithms
folder.
Also contained is the auto generated builtin for
sliceLine.
Closes #2094
---
src/main/python/generator/generator.py | 16 ++++++++++++++++
src/main/python/systemds/operator/algorithm/__init__.py | 2 ++
.../systemds/operator/algorithm/builtin/incSliceLine.py | 2 +-
.../algorithm/builtin/{slicefinder.py => sliceLine.py} | 13 +++++++------
.../systemds/operator/algorithm/builtin/slicefinder.py | 5 +++--
5 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/src/main/python/generator/generator.py
b/src/main/python/generator/generator.py
index eeed4f4aed..2fbcd49b6b 100644
--- a/src/main/python/generator/generator.py
+++ b/src/main/python/generator/generator.py
@@ -28,6 +28,9 @@ from dml_parser import FunctionParser
from typing import List, Tuple
+manually_added_algorithm_builtins = []
+
+
class PythonAPIFileGenerator(object):
target_path = os.path.join(os.path.dirname(os.path.dirname(
@@ -53,6 +56,11 @@ class PythonAPIFileGenerator(object):
self.extension = '.{extension}'.format(extension=extension)
os.makedirs(self.__class__.target_path, exist_ok=True)
self.function_names = list()
+ for name in manually_added_algorithm_builtins:
+ # only add files which actually exist, to avoid breaking
+ if self.check_manually_added_file(name + self.extension):
+ self.function_names.append(name)
+
path = os.path.dirname(__file__)
with open(os.path.join(path, self.__class__.template_path), 'r') as f:
@@ -63,6 +71,13 @@ class PythonAPIFileGenerator(object):
self.generated_by = "# Autogenerated By :
src/main/python/generator/generator.py\n"
self.generated_from = "# Autogenerated From : "
+ def check_manually_added_file(self, name: str):
+ path = os.path.join(self.target_path, name)
+ exists = os.path.isfile(path)
+ if not exists:
+ print("[ERROR] Manually added builtin algorithm not found :
\'{file_name}\' \n .".format(file_name=path))
+ return exists
+
def generate_file(self, filename: str, file_content: str, dml_file: str):
"""
Generates file in self.path with name file_name
@@ -389,4 +404,5 @@ if __name__ == "__main__":
continue
file_generator.generate_file(
data["function_name"], script_content, dml_file)
+ file_generator.function_names.sort()
file_generator.generate_init_file()
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py
b/src/main/python/systemds/operator/algorithm/__init__.py
index bdc7d99f52..baf2976e49 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -166,6 +166,7 @@ from .builtin.sherlockPredict import sherlockPredict
from .builtin.shortestPath import shortestPath
from .builtin.sigmoid import sigmoid
from .builtin.skewness import skewness
+from .builtin.sliceLine import sliceLine
from .builtin.sliceLineDebug import sliceLineDebug
from .builtin.slicefinder import slicefinder
from .builtin.smape import smape
@@ -338,6 +339,7 @@ __all__ = ['WoE',
'shortestPath',
'sigmoid',
'skewness',
+ 'sliceLine',
'sliceLineDebug',
'slicefinder',
'smape',
diff --git
a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
index 440101494f..a95302e9d8 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
@@ -36,7 +36,7 @@ def incSliceLine(addedX: Matrix,
This builtin function implements incSliceLine, a linear-algebra-based
ML model debugging technique for finding the top-k data slices where
a trained models performs significantly worse than on the overall
- dataset. IncSliceLine is designed for scenarios in which training data is
updated incrementally.
+ dataset. IncSliceLine is designed for scenarios in which training data is
updated incrementally.
For a detailed description of the SliceLine algorithm and experimental
results, see:
Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
Slice Finding for ML Model Debugging.(SIGMOD 2021)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
b/src/main/python/systemds/operator/algorithm/builtin/sliceLine.py
similarity index 89%
copy from src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
copy to src/main/python/systemds/operator/algorithm/builtin/sliceLine.py
index 99e6bf415c..873e1110b4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/sliceLine.py
@@ -20,7 +20,7 @@
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/slicefinder.dml
+# Autogenerated From : scripts/builtin/sliceLine.dml
from typing import Dict, Iterable
@@ -29,15 +29,16 @@ from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
-def slicefinder(X: Matrix,
- e: Matrix,
- **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def sliceLine(X: Matrix,
+ e: Matrix,
+ **kwargs: Dict[str, VALID_INPUT_TYPES]):
"""
This builtin function implements SliceLine, a linear-algebra-based
ML model debugging technique for finding the top-k data slices where
a trained models performs significantly worse than on the overall
dataset. For a detailed description and experimental results, see:
- Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
Slice Finding for ML Model Debugging.(SIGMOD 2021)
+ Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
+ Slice Finding for ML Model Debugging.(SIGMOD 2021)
@@ -66,7 +67,7 @@ def slicefinder(X: Matrix,
vX_2 = Matrix(X.sds_context, '')
output_nodes = [vX_0, vX_1, vX_2, ]
- op = MultiReturn(X.sds_context, 'slicefinder', output_nodes,
named_input_nodes=params_dict)
+ op = MultiReturn(X.sds_context, 'sliceLine', output_nodes,
named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
diff --git a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
b/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
index 99e6bf415c..1d8a6f98bb 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
@@ -35,9 +35,10 @@ def slicefinder(X: Matrix,
"""
This builtin function implements SliceLine, a linear-algebra-based
ML model debugging technique for finding the top-k data slices where
- a trained models performs significantly worse than on the overall
+ a trained models performs significantly worse than on the overall
dataset. For a detailed description and experimental results, see:
- Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
Slice Finding for ML Model Debugging.(SIGMOD 2021)
+ Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
+ Slice Finding for ML Model Debugging.(SIGMOD 2021)