This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 15e278a257ead5cf42891d2970664ba5f0682e80 Author: baunsgaard <[email protected]> AuthorDate: Tue Nov 15 15:19:06 2022 +0100 [SYSTEMDS-3464] Python Combine Write This commit finally adds a long wanted feature to python scripts. This allows us to call multiple end nodes in a script without having to do multiple executions. Combine(Write(X,"Path1"), Write(Y,"Path2")).compute() Similarly we can do: Combine(Print(X), Write(X,"Path1)).compute() Closes #1729 --- .../docs/source/code/guide/end_to_end/part2.py | 26 ++++++++--- .../docs/source/guide/python_end_to_end_tut.rst | 34 +++++++++----- .../python/systemds/context/systemds_context.py | 15 ++++++- src/main/python/systemds/operator/__init__.py | 6 ++- src/main/python/systemds/operator/nodes/combine.py | 52 ++++++++++++++++++++++ 5 files changed, 114 insertions(+), 19 deletions(-) diff --git a/src/main/python/docs/source/code/guide/end_to_end/part2.py b/src/main/python/docs/source/code/guide/end_to_end/part2.py index 73c7a6539f..a408e2796a 100644 --- a/src/main/python/docs/source/code/guide/end_to_end/part2.py +++ b/src/main/python/docs/source/code/guide/end_to_end/part2.py @@ -40,15 +40,11 @@ with SystemDSContext() as sds: # Transform frames to matrices. X, M1 = X_frame.transform_encode(spec=jspec_data) - Xt = Xt_frame.transform_apply(spec=jspec_data, meta=M1) Y, M2 = Y_frame.transform_encode(spec=jspec_labels) - Yt = Yt_frame.transform_apply(spec=jspec_labels, meta=M2) # Subsample to make training faster X = X[0:train_count] Y = Y[0:train_count] - Xt = Xt[0:test_count] - Yt = Yt[0:test_count] # Load custom neural network neural_net_src_path = "tests/examples/tutorials/neural_net_source.dml" @@ -60,6 +56,26 @@ with SystemDSContext() as sds: seed = 42 network = FFN_package.train(X, Y, epochs, batch_size, learning_rate, seed) + + # Write metadata and trained network to disk. + sds.combine( + network.write('tests/examples/docs_test/end_to_end/network'), + M1.write('tests/examples/docs_test/end_to_end/encode_X'), + M2.write('tests/examples/docs_test/end_to_end/encode_Y') + ).compute() - network.write('tests/examples/docs_test/end_to_end/').compute() + # Read metadata and trained network and do prediction. + M1_r = sds.read('tests/examples/docs_test/end_to_end/encode_X') + M2_r = sds.read('tests/examples/docs_test/end_to_end/encode_Y') + network_r = sds.read('tests/examples/docs_test/end_to_end/network') + Xt = Xt_frame.transform_apply(spec=jspec_data, meta=M1_r) + Yt = Yt_frame.transform_apply(spec=jspec_labels, meta=M2_r) + Xt = Xt[0:test_count] + Yt = Yt[0:test_count] + FFN_package_2 = sds.source(neural_net_src_path, "fnn") + probs = FFN_package_2.predict(Xt, network_r) + accuracy = FFN_package_2.eval(probs, Yt).compute() + import logging + logging.info("accuracy: " + str(accuracy)) + diff --git a/src/main/python/docs/source/guide/python_end_to_end_tut.rst b/src/main/python/docs/source/guide/python_end_to_end_tut.rst index 2a8cb9fb76..961b47d61b 100644 --- a/src/main/python/docs/source/guide/python_end_to_end_tut.rst +++ b/src/main/python/docs/source/guide/python_end_to_end_tut.rst @@ -118,12 +118,13 @@ For this we will introduce another dml file, which can be used to train a basic Step 1: Obtain data ~~~~~~~~~~~~~~~~~~~ -For the whole data setup please refer to level 1, Step 1, as these steps are identical. +For the whole data setup please refer to level 1, Step 1, as these steps are almost identical, +but instead of preparing the test data, we only prepare the training data. .. include:: ../code/guide/end_to_end/part2.py :code: python :start-line: 20 - :end-line: 51 + :end-line: 47 Step 2: Load the algorithm ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -134,12 +135,10 @@ This file includes all the necessary functions for training, evaluating, and sto The returned object of the source call is further used for calling the functions. The file can be found here: - - :doc:tests/examples/tutorials/neural_net_source.dml - .. include:: ../code/guide/end_to_end/part2.py :code: python - :start-line: 54 - :end-line: 55 + :start-line: 48 + :end-line: 51 Step 3: Training the neural network @@ -153,8 +152,8 @@ The seed argument ensures that running the code again yields the same results. .. include:: ../code/guide/end_to_end/part2.py :code: python - :start-line: 61 - :end-line: 62 + :start-line: 52 + :end-line: 58 Step 4: Saving the model @@ -163,15 +162,28 @@ Step 4: Saving the model For later usage, we can save the trained model. We only need to specify the name of our model and the file path. This call stores the weights and biases of our model. +Similarly the transformation metadata to transform input data to the model, +is saved. .. include:: ../code/guide/end_to_end/part2.py :code: python - :start-line: 64 + :start-line: 59 :end-line: 65 +Step 5: Predict on Unseen data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once the model is saved along with metadata, it is simple to apply it all to +unseen data: + +.. include:: ../code/guide/end_to_end/part2.py + :code: python + :start-line: 66 + :end-line: 77 + Full Script NN -~~~~~~~~~~~--- +~~~~~~~~~~~~~~ The complete script now can be seen here: @@ -179,4 +191,4 @@ The complete script now can be seen here: .. include:: ../code/guide/end_to_end/part2.py :code: python :start-line: 20 - :end-line: 64 + :end-line: 80 diff --git a/src/main/python/systemds/context/systemds_context.py b/src/main/python/systemds/context/systemds_context.py index 8543e83417..a4c89d557e 100644 --- a/src/main/python/systemds/context/systemds_context.py +++ b/src/main/python/systemds/context/systemds_context.py @@ -38,7 +38,7 @@ import numpy as np import pandas as pd from py4j.java_gateway import GatewayParameters, JavaGateway, Py4JNetworkError from systemds.operator import (Frame, List, Matrix, OperationNode, Scalar, - Source) + Source, Combine) from systemds.script_building import DMLScript, OutputType from systemds.utils.consts import VALID_INPUT_TYPES from systemds.utils.helpers import get_module_dir @@ -630,6 +630,19 @@ class SystemDSContext(object): """ return List(self, unnamed_input_nodes=args, named_input_nodes=kwargs) + def combine(self, *args: Sequence[VALID_INPUT_TYPES]) -> Combine: + """ combine nodes to call compute on multiple operations. + + This is usefull for the case of having multiple writes in one script and wanting + to execute all in one execution reusing intermediates. + + Note this combine does not allow to return anything to the user, so if used, + please only use nodes that end with either writing or printing elements. + + :param args: A sequence that will be executed with call to compute() + """ + return Combine(self, unnamed_input_nodes=args) + def array(self, *args: Sequence[VALID_INPUT_TYPES]) -> List: """ Create a List object containing the given nodes. diff --git a/src/main/python/systemds/operator/__init__.py b/src/main/python/systemds/operator/__init__.py index c1577441c8..51a586bc63 100644 --- a/src/main/python/systemds/operator/__init__.py +++ b/src/main/python/systemds/operator/__init__.py @@ -20,13 +20,15 @@ # ------------------------------------------------------------- from systemds.operator.operation_node import OperationNode -from systemds.operator.nodes.multi_return import MultiReturn from systemds.operator.nodes.scalar import Scalar from systemds.operator.nodes.matrix import Matrix +from systemds.operator.nodes.multi_return import MultiReturn from systemds.operator.nodes.frame import Frame +from systemds.operator.nodes.combine import Combine from systemds.operator.nodes.list_access import ListAccess from systemds.operator.nodes.list import List from systemds.operator.nodes.source import Source from systemds.operator import algorithm -__all__ = ["OperationNode", "algorithm", "Scalar", "List", "ListAccess", "Matrix", "Frame", "Source", "MultiReturn"] +__all__ = ["OperationNode", "algorithm", "Scalar", "List", + "ListAccess", "Matrix", "Frame", "Source", "MultiReturn", "Combine"] diff --git a/src/main/python/systemds/operator/nodes/combine.py b/src/main/python/systemds/operator/nodes/combine.py new file mode 100644 index 0000000000..9f7e9f0439 --- /dev/null +++ b/src/main/python/systemds/operator/nodes/combine.py @@ -0,0 +1,52 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + + +__all__ = ["Combine"] + +from typing import Dict, Iterable, List, Sequence + +from systemds.operator import OperationNode +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +class Combine(OperationNode): + + def __init__(self, sds_context, func='', + unnamed_input_nodes: Iterable[OperationNode] = None): + for a in unnamed_input_nodes: + if(a.output_type != OutputType.NONE): + raise ValueError( + "Cannot combine elements that have outputs, all elements must be instances of print or write") + + self._outputs = {} + super().__init__(sds_context, func, unnamed_input_nodes, None, OutputType.NONE, False) + + def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str]) -> str: + return '' + + def compute(self, verbose: bool = False, lineage: bool = False): + return super().compute(verbose, lineage) + + def __str__(self): + return "Combine"
