[systemds] 01/02: [SYSTEMDS-3464] Python Combine Write

baunsgaard Tue, 15 Nov 2022 06:49:11 -0800

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


commit 15e278a257ead5cf42891d2970664ba5f0682e80
Author: baunsgaard <[email protected]>
AuthorDate: Tue Nov 15 15:19:06 2022 +0100

    [SYSTEMDS-3464] Python Combine Write
    
    This commit finally adds a long wanted feature to python scripts.
    This allows us to call multiple end nodes in a script without having
    to do multiple executions.
    
    Combine(Write(X,"Path1"), Write(Y,"Path2")).compute()
    
    Similarly we can do:
    
    Combine(Print(X), Write(X,"Path1)).compute()
    
    Closes #1729
---
 .../docs/source/code/guide/end_to_end/part2.py     | 26 ++++++++---
 .../docs/source/guide/python_end_to_end_tut.rst    | 34 +++++++++-----
 .../python/systemds/context/systemds_context.py    | 15 ++++++-
 src/main/python/systemds/operator/__init__.py      |  6 ++-
 src/main/python/systemds/operator/nodes/combine.py | 52 ++++++++++++++++++++++
 5 files changed, 114 insertions(+), 19 deletions(-)

diff --git a/src/main/python/docs/source/code/guide/end_to_end/part2.py 
b/src/main/python/docs/source/code/guide/end_to_end/part2.py
index 73c7a6539f..a408e2796a 100644
--- a/src/main/python/docs/source/code/guide/end_to_end/part2.py
+++ b/src/main/python/docs/source/code/guide/end_to_end/part2.py
@@ -40,15 +40,11 @@ with SystemDSContext() as sds:
 
     # Transform frames to matrices.
     X, M1 = X_frame.transform_encode(spec=jspec_data)
-    Xt = Xt_frame.transform_apply(spec=jspec_data, meta=M1)
     Y, M2 = Y_frame.transform_encode(spec=jspec_labels)
-    Yt = Yt_frame.transform_apply(spec=jspec_labels, meta=M2)
 
     # Subsample to make training faster
     X = X[0:train_count]
     Y = Y[0:train_count]
-    Xt = Xt[0:test_count]
-    Yt = Yt[0:test_count]
 
     # Load custom neural network
     neural_net_src_path = "tests/examples/tutorials/neural_net_source.dml"
@@ -60,6 +56,26 @@ with SystemDSContext() as sds:
     seed = 42
 
     network = FFN_package.train(X, Y, epochs, batch_size, learning_rate, seed)
+    
+    # Write metadata and trained network to disk.
+    sds.combine(
+        network.write('tests/examples/docs_test/end_to_end/network'),
+        M1.write('tests/examples/docs_test/end_to_end/encode_X'),
+        M2.write('tests/examples/docs_test/end_to_end/encode_Y')
+        ).compute()
 
-    network.write('tests/examples/docs_test/end_to_end/').compute()
+    # Read metadata and trained network and do prediction.
+    M1_r = sds.read('tests/examples/docs_test/end_to_end/encode_X')
+    M2_r = sds.read('tests/examples/docs_test/end_to_end/encode_Y')
+    network_r = sds.read('tests/examples/docs_test/end_to_end/network')
+    Xt = Xt_frame.transform_apply(spec=jspec_data, meta=M1_r)
+    Yt = Yt_frame.transform_apply(spec=jspec_labels, meta=M2_r)
+    Xt = Xt[0:test_count]
+    Yt = Yt[0:test_count]
+    FFN_package_2 = sds.source(neural_net_src_path, "fnn")
+    probs = FFN_package_2.predict(Xt, network_r)
+    accuracy = FFN_package_2.eval(probs, Yt).compute()
 
+    import logging
+    logging.info("accuracy: " + str(accuracy))
+    
diff --git a/src/main/python/docs/source/guide/python_end_to_end_tut.rst 
b/src/main/python/docs/source/guide/python_end_to_end_tut.rst
index 2a8cb9fb76..961b47d61b 100644
--- a/src/main/python/docs/source/guide/python_end_to_end_tut.rst
+++ b/src/main/python/docs/source/guide/python_end_to_end_tut.rst
@@ -118,12 +118,13 @@ For this we will introduce another dml file, which can be 
used to train a basic
 Step 1: Obtain data
 ~~~~~~~~~~~~~~~~~~~
 
-For the whole data setup please refer to level 1, Step 1, as these steps are 
identical.
+For the whole data setup please refer to level 1, Step 1, as these steps are 
almost identical,
+but instead of preparing the test data, we only prepare the training data.
 
 .. include:: ../code/guide/end_to_end/part2.py
   :code: python
   :start-line: 20
-  :end-line: 51
+  :end-line: 47
 
 Step 2: Load the algorithm
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -134,12 +135,10 @@ This file includes all the necessary functions for 
training, evaluating, and sto
 The returned object of the source call is further used for calling the 
functions.
 The file can be found here:
 
-    - :doc:tests/examples/tutorials/neural_net_source.dml
-
 .. include:: ../code/guide/end_to_end/part2.py
   :code: python
-  :start-line: 54
-  :end-line: 55
+  :start-line: 48
+  :end-line: 51
 
 
 Step 3: Training the neural network
@@ -153,8 +152,8 @@ The seed argument ensures that running the code again 
yields the same results.
 
 .. include:: ../code/guide/end_to_end/part2.py
   :code: python
-  :start-line: 61
-  :end-line: 62
+  :start-line: 52
+  :end-line: 58
 
 
 Step 4: Saving the model
@@ -163,15 +162,28 @@ Step 4: Saving the model
 For later usage, we can save the trained model.
 We only need to specify the name of our model and the file path.
 This call stores the weights and biases of our model.
+Similarly the transformation metadata to transform input data to the model,
+is saved.
 
 .. include:: ../code/guide/end_to_end/part2.py
   :code: python
-  :start-line: 64
+  :start-line: 59
   :end-line: 65
 
+Step 5: Predict on Unseen data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Once the model is saved along with metadata, it is simple to apply it all to
+unseen data:
+
+.. include:: ../code/guide/end_to_end/part2.py
+  :code: python
+  :start-line: 66
+  :end-line: 77
+
 
 Full Script NN
-~~~~~~~~~~~---
+~~~~~~~~~~~~~~
 
 The complete script now can be seen here:
 
@@ -179,4 +191,4 @@ The complete script now can be seen here:
 .. include:: ../code/guide/end_to_end/part2.py
   :code: python
   :start-line: 20
-  :end-line: 64
+  :end-line: 80
diff --git a/src/main/python/systemds/context/systemds_context.py 
b/src/main/python/systemds/context/systemds_context.py
index 8543e83417..a4c89d557e 100644
--- a/src/main/python/systemds/context/systemds_context.py
+++ b/src/main/python/systemds/context/systemds_context.py
@@ -38,7 +38,7 @@ import numpy as np
 import pandas as pd
 from py4j.java_gateway import GatewayParameters, JavaGateway, Py4JNetworkError
 from systemds.operator import (Frame, List, Matrix, OperationNode, Scalar,
-                               Source)
+                               Source, Combine)
 from systemds.script_building import DMLScript, OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 from systemds.utils.helpers import get_module_dir
@@ -630,6 +630,19 @@ class SystemDSContext(object):
         """
         return List(self, unnamed_input_nodes=args, named_input_nodes=kwargs)
 
+    def combine(self, *args: Sequence[VALID_INPUT_TYPES]) -> Combine:
+        """ combine nodes to call compute on multiple operations.
+
+        This is usefull for the case of having multiple writes in one script 
and wanting 
+        to execute all in one execution reusing intermediates.
+
+        Note this combine does not allow to return anything to the user, so if 
used,
+        please only use nodes that end with either writing or printing 
elements.
+
+        :param args: A sequence that will be executed with call to compute() 
+        """
+        return Combine(self, unnamed_input_nodes=args)
+
     def array(self, *args: Sequence[VALID_INPUT_TYPES]) -> List:
         """ Create a List object containing the given nodes.
 
diff --git a/src/main/python/systemds/operator/__init__.py 
b/src/main/python/systemds/operator/__init__.py
index c1577441c8..51a586bc63 100644
--- a/src/main/python/systemds/operator/__init__.py
+++ b/src/main/python/systemds/operator/__init__.py
@@ -20,13 +20,15 @@
 # -------------------------------------------------------------
 
 from systemds.operator.operation_node import OperationNode
-from systemds.operator.nodes.multi_return import MultiReturn
 from systemds.operator.nodes.scalar import Scalar
 from systemds.operator.nodes.matrix import Matrix
+from systemds.operator.nodes.multi_return import MultiReturn
 from systemds.operator.nodes.frame import Frame
+from systemds.operator.nodes.combine import Combine
 from systemds.operator.nodes.list_access import ListAccess
 from systemds.operator.nodes.list import List
 from systemds.operator.nodes.source import Source
 from systemds.operator import algorithm
 
-__all__ = ["OperationNode", "algorithm", "Scalar", "List", "ListAccess", 
"Matrix", "Frame", "Source", "MultiReturn"]
+__all__ = ["OperationNode", "algorithm", "Scalar", "List",
+           "ListAccess", "Matrix", "Frame", "Source", "MultiReturn", "Combine"]
diff --git a/src/main/python/systemds/operator/nodes/combine.py 
b/src/main/python/systemds/operator/nodes/combine.py
new file mode 100644
index 0000000000..9f7e9f0439
--- /dev/null
+++ b/src/main/python/systemds/operator/nodes/combine.py
@@ -0,0 +1,52 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+
+__all__ = ["Combine"]
+
+from typing import Dict, Iterable, List, Sequence
+
+from systemds.operator import OperationNode
+from systemds.script_building.dag import OutputType
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+class Combine(OperationNode):
+
+    def __init__(self, sds_context, func='',
+                 unnamed_input_nodes: Iterable[OperationNode] = None):
+        for a in unnamed_input_nodes:
+            if(a.output_type != OutputType.NONE):
+                raise ValueError(
+                    "Cannot combine elements that have outputs, all elements 
must be instances of print or write")
+
+        self._outputs = {}
+        super().__init__(sds_context, func, unnamed_input_nodes, None, 
OutputType.NONE, False)
+
+    def code_line(self, var_name: str, unnamed_input_vars: Sequence[str],
+                  named_input_vars: Dict[str, str]) -> str:
+        return ''
+
+    def compute(self, verbose: bool = False, lineage: bool = False):
+        return super().compute(verbose, lineage)
+
+    def __str__(self):
+        return "Combine"

[systemds] 01/02: [SYSTEMDS-3464] Python Combine Write

Reply via email to