This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new fe4b4a29fb [MINOR] Python API: manual generator option fe4b4a29fb is described below commit fe4b4a29fbefc61be36d18e6c14e30e67b6566ab Author: e-strauss <lathan...@gmx.de> AuthorDate: Wed Sep 4 19:15:38 2024 +0200 [MINOR] Python API: manual generator option This commit adds a manual option for adding algorithm builtin files to the python algorithms folder. Also contained is the auto generated builtin for sliceLine. Closes #2094 --- src/main/python/generator/generator.py | 16 ++++++++++++++++ src/main/python/systemds/operator/algorithm/__init__.py | 2 ++ .../systemds/operator/algorithm/builtin/incSliceLine.py | 2 +- .../algorithm/builtin/{slicefinder.py => sliceLine.py} | 13 +++++++------ .../systemds/operator/algorithm/builtin/slicefinder.py | 5 +++-- 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/main/python/generator/generator.py b/src/main/python/generator/generator.py index eeed4f4aed..2fbcd49b6b 100644 --- a/src/main/python/generator/generator.py +++ b/src/main/python/generator/generator.py @@ -28,6 +28,9 @@ from dml_parser import FunctionParser from typing import List, Tuple +manually_added_algorithm_builtins = [] + + class PythonAPIFileGenerator(object): target_path = os.path.join(os.path.dirname(os.path.dirname( @@ -53,6 +56,11 @@ class PythonAPIFileGenerator(object): self.extension = '.{extension}'.format(extension=extension) os.makedirs(self.__class__.target_path, exist_ok=True) self.function_names = list() + for name in manually_added_algorithm_builtins: + # only add files which actually exist, to avoid breaking + if self.check_manually_added_file(name + self.extension): + self.function_names.append(name) + path = os.path.dirname(__file__) with open(os.path.join(path, self.__class__.template_path), 'r') as f: @@ -63,6 +71,13 @@ class PythonAPIFileGenerator(object): self.generated_by = "# Autogenerated By : src/main/python/generator/generator.py\n" self.generated_from = "# Autogenerated From : " + def check_manually_added_file(self, name: str): + path = os.path.join(self.target_path, name) + exists = os.path.isfile(path) + if not exists: + print("[ERROR] Manually added builtin algorithm not found : \'{file_name}\' \n .".format(file_name=path)) + return exists + def generate_file(self, filename: str, file_content: str, dml_file: str): """ Generates file in self.path with name file_name @@ -389,4 +404,5 @@ if __name__ == "__main__": continue file_generator.generate_file( data["function_name"], script_content, dml_file) + file_generator.function_names.sort() file_generator.generate_init_file() diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py index bdc7d99f52..baf2976e49 100644 --- a/src/main/python/systemds/operator/algorithm/__init__.py +++ b/src/main/python/systemds/operator/algorithm/__init__.py @@ -166,6 +166,7 @@ from .builtin.sherlockPredict import sherlockPredict from .builtin.shortestPath import shortestPath from .builtin.sigmoid import sigmoid from .builtin.skewness import skewness +from .builtin.sliceLine import sliceLine from .builtin.sliceLineDebug import sliceLineDebug from .builtin.slicefinder import slicefinder from .builtin.smape import smape @@ -338,6 +339,7 @@ __all__ = ['WoE', 'shortestPath', 'sigmoid', 'skewness', + 'sliceLine', 'sliceLineDebug', 'slicefinder', 'smape', diff --git a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py index 440101494f..a95302e9d8 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py +++ b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py @@ -36,7 +36,7 @@ def incSliceLine(addedX: Matrix, This builtin function implements incSliceLine, a linear-algebra-based ML model debugging technique for finding the top-k data slices where a trained models performs significantly worse than on the overall - dataset. IncSliceLine is designed for scenarios in which training data is updated incrementally. + dataset. IncSliceLine is designed for scenarios in which training data is updated incrementally. For a detailed description of the SliceLine algorithm and experimental results, see: Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging.(SIGMOD 2021) diff --git a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py b/src/main/python/systemds/operator/algorithm/builtin/sliceLine.py similarity index 89% copy from src/main/python/systemds/operator/algorithm/builtin/slicefinder.py copy to src/main/python/systemds/operator/algorithm/builtin/sliceLine.py index 99e6bf415c..873e1110b4 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py +++ b/src/main/python/systemds/operator/algorithm/builtin/sliceLine.py @@ -20,7 +20,7 @@ # ------------------------------------------------------------- # Autogenerated By : src/main/python/generator/generator.py -# Autogenerated From : scripts/builtin/slicefinder.dml +# Autogenerated From : scripts/builtin/sliceLine.dml from typing import Dict, Iterable @@ -29,15 +29,16 @@ from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def slicefinder(X: Matrix, - e: Matrix, - **kwargs: Dict[str, VALID_INPUT_TYPES]): +def sliceLine(X: Matrix, + e: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): """ This builtin function implements SliceLine, a linear-algebra-based ML model debugging technique for finding the top-k data slices where a trained models performs significantly worse than on the overall dataset. For a detailed description and experimental results, see: - Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging.(SIGMOD 2021) + Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based + Slice Finding for ML Model Debugging.(SIGMOD 2021) @@ -66,7 +67,7 @@ def slicefinder(X: Matrix, vX_2 = Matrix(X.sds_context, '') output_nodes = [vX_0, vX_1, vX_2, ] - op = MultiReturn(X.sds_context, 'slicefinder', output_nodes, named_input_nodes=params_dict) + op = MultiReturn(X.sds_context, 'sliceLine', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] diff --git a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py b/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py index 99e6bf415c..1d8a6f98bb 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py +++ b/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py @@ -35,9 +35,10 @@ def slicefinder(X: Matrix, """ This builtin function implements SliceLine, a linear-algebra-based ML model debugging technique for finding the top-k data slices where - a trained models performs significantly worse than on the overall + a trained models performs significantly worse than on the overall dataset. For a detailed description and experimental results, see: - Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging.(SIGMOD 2021) + Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based + Slice Finding for ML Model Debugging.(SIGMOD 2021)