This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new fe4b4a29fb [MINOR] Python API: manual generator option
fe4b4a29fb is described below

commit fe4b4a29fbefc61be36d18e6c14e30e67b6566ab
Author: e-strauss <lathan...@gmx.de>
AuthorDate: Wed Sep 4 19:15:38 2024 +0200

    [MINOR] Python API: manual generator option
    
    This commit adds a manual option for adding
    algorithm builtin files to the python algorithms
    folder.
    
    Also contained is the auto generated builtin for
    sliceLine.
    
    Closes #2094
---
 src/main/python/generator/generator.py                   | 16 ++++++++++++++++
 src/main/python/systemds/operator/algorithm/__init__.py  |  2 ++
 .../systemds/operator/algorithm/builtin/incSliceLine.py  |  2 +-
 .../algorithm/builtin/{slicefinder.py => sliceLine.py}   | 13 +++++++------
 .../systemds/operator/algorithm/builtin/slicefinder.py   |  5 +++--
 5 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/src/main/python/generator/generator.py 
b/src/main/python/generator/generator.py
index eeed4f4aed..2fbcd49b6b 100644
--- a/src/main/python/generator/generator.py
+++ b/src/main/python/generator/generator.py
@@ -28,6 +28,9 @@ from dml_parser import FunctionParser
 from typing import List, Tuple
 
 
+manually_added_algorithm_builtins = []
+
+
 class PythonAPIFileGenerator(object):
 
     target_path = os.path.join(os.path.dirname(os.path.dirname(
@@ -53,6 +56,11 @@ class PythonAPIFileGenerator(object):
         self.extension = '.{extension}'.format(extension=extension)
         os.makedirs(self.__class__.target_path, exist_ok=True)
         self.function_names = list()
+        for name in manually_added_algorithm_builtins:
+            # only add files which actually exist, to avoid breaking
+            if self.check_manually_added_file(name + self.extension):
+                self.function_names.append(name)
+
         path = os.path.dirname(__file__)
 
         with open(os.path.join(path, self.__class__.template_path), 'r') as f:
@@ -63,6 +71,13 @@ class PythonAPIFileGenerator(object):
         self.generated_by = "# Autogenerated By   : 
src/main/python/generator/generator.py\n"
         self.generated_from = "# Autogenerated From : "
 
+    def check_manually_added_file(self, name: str):
+        path = os.path.join(self.target_path, name)
+        exists = os.path.isfile(path)
+        if not exists:
+            print("[ERROR] Manually added builtin algorithm not found : 
\'{file_name}\' \n .".format(file_name=path))
+        return exists
+
     def generate_file(self, filename: str, file_content: str, dml_file: str):
         """
         Generates file in self.path with name file_name
@@ -389,4 +404,5 @@ if __name__ == "__main__":
             continue
         file_generator.generate_file(
             data["function_name"], script_content, dml_file)
+    file_generator.function_names.sort()
     file_generator.generate_init_file()
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py 
b/src/main/python/systemds/operator/algorithm/__init__.py
index bdc7d99f52..baf2976e49 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -166,6 +166,7 @@ from .builtin.sherlockPredict import sherlockPredict
 from .builtin.shortestPath import shortestPath 
 from .builtin.sigmoid import sigmoid 
 from .builtin.skewness import skewness 
+from .builtin.sliceLine import sliceLine 
 from .builtin.sliceLineDebug import sliceLineDebug 
 from .builtin.slicefinder import slicefinder 
 from .builtin.smape import smape 
@@ -338,6 +339,7 @@ __all__ = ['WoE',
  'shortestPath',
  'sigmoid',
  'skewness',
+ 'sliceLine',
  'sliceLineDebug',
  'slicefinder',
  'smape',
diff --git 
a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py 
b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
index 440101494f..a95302e9d8 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/incSliceLine.py
@@ -36,7 +36,7 @@ def incSliceLine(addedX: Matrix,
      This builtin function implements incSliceLine, a linear-algebra-based
      ML model debugging technique for finding the top-k data slices where
      a trained models performs significantly worse than on the overall
-     dataset. IncSliceLine is designed for scenarios in which training data is 
updated incrementally. 
+     dataset. IncSliceLine is designed for scenarios in which training data is 
updated incrementally.
      For a detailed description of the SliceLine algorithm and experimental 
results, see:
      Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based 
Slice Finding for ML Model Debugging.(SIGMOD 2021)
     
diff --git a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py 
b/src/main/python/systemds/operator/algorithm/builtin/sliceLine.py
similarity index 89%
copy from src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
copy to src/main/python/systemds/operator/algorithm/builtin/sliceLine.py
index 99e6bf415c..873e1110b4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/sliceLine.py
@@ -20,7 +20,7 @@
 # -------------------------------------------------------------
 
 # Autogenerated By   : src/main/python/generator/generator.py
-# Autogenerated From : scripts/builtin/slicefinder.dml
+# Autogenerated From : scripts/builtin/sliceLine.dml
 
 from typing import Dict, Iterable
 
@@ -29,15 +29,16 @@ from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def slicefinder(X: Matrix,
-                e: Matrix,
-                **kwargs: Dict[str, VALID_INPUT_TYPES]):
+def sliceLine(X: Matrix,
+              e: Matrix,
+              **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This builtin function implements SliceLine, a linear-algebra-based
      ML model debugging technique for finding the top-k data slices where
      a trained models performs significantly worse than on the overall 
      dataset. For a detailed description and experimental results, see:
-     Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based 
Slice Finding for ML Model Debugging.(SIGMOD 2021)
+     Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
+     Slice Finding for ML Model Debugging.(SIGMOD 2021)
     
     
     
@@ -66,7 +67,7 @@ def slicefinder(X: Matrix,
     vX_2 = Matrix(X.sds_context, '')
     output_nodes = [vX_0, vX_1, vX_2, ]
 
-    op = MultiReturn(X.sds_context, 'slicefinder', output_nodes, 
named_input_nodes=params_dict)
+    op = MultiReturn(X.sds_context, 'sliceLine', output_nodes, 
named_input_nodes=params_dict)
 
     vX_0._unnamed_input_nodes = [op]
     vX_1._unnamed_input_nodes = [op]
diff --git a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py 
b/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
index 99e6bf415c..1d8a6f98bb 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/slicefinder.py
@@ -35,9 +35,10 @@ def slicefinder(X: Matrix,
     """
      This builtin function implements SliceLine, a linear-algebra-based
      ML model debugging technique for finding the top-k data slices where
-     a trained models performs significantly worse than on the overall 
+     a trained models performs significantly worse than on the overall
      dataset. For a detailed description and experimental results, see:
-     Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based 
Slice Finding for ML Model Debugging.(SIGMOD 2021)
+     Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
+     Slice Finding for ML Model Debugging.(SIGMOD 2021)
     
     
     

Reply via email to