This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemds.git
commit ef0c5e8e69f99f9b9eb68ace4c27e05624db25ec Author: baunsgaard <[email protected]> AuthorDate: Mon Jun 7 15:44:54 2021 +0200 [DOCS] Python API Docs update This commit update the docs and adds a few utilities to the python API - Add constructors called array and dict for making lists in python. - Generator generate __all__ as strings, with newlines - Add auto-gen-docs for frame, list, scalar and source. --- .../docs/source/api/context/systemds_context.rst | 12 ++- .../python/docs/source/api/operator/algorithms.rst | 4 +- .../api/operator/node/{matrix.rst => frame.rst} | 16 ++-- .../api/operator/node/{matrix.rst => list.rst} | 21 ++--- .../docs/source/api/operator/node/matrix.rst | 2 - .../api/operator/node/{matrix.rst => scalar.rst} | 14 ++-- .../api/operator/node/{matrix.rst => source.rst} | 18 ++--- .../python/docs/source/getting_started/install.rst | 2 +- .../source/getting_started/simple_examples.rst | 8 +- .../python/docs/source/guide/algorithms_basics.rst | 53 +++++++++---- src/main/python/docs/source/guide/federated.rst | 24 ++++-- src/main/python/docs/source/index.rst | 6 +- src/main/python/generator/generator.py | 2 +- src/main/python/systemds/__init__.py | 2 +- src/main/python/systemds/context/__init__.py | 2 +- .../python/systemds/context/systemds_context.py | 52 +++++++++--- src/main/python/systemds/operator/__init__.py | 2 +- .../python/systemds/operator/algorithm/__init__.py | 92 +++++++++++++++++++++- src/main/python/tests/list/test_list.py | 4 +- 19 files changed, 252 insertions(+), 84 deletions(-) diff --git a/src/main/python/docs/source/api/context/systemds_context.rst b/src/main/python/docs/source/api/context/systemds_context.rst index 531260c..664c654 100644 --- a/src/main/python/docs/source/api/context/systemds_context.rst +++ b/src/main/python/docs/source/api/context/systemds_context.rst @@ -24,17 +24,23 @@ SystemDSContext All operations using SystemDS need a java instance running. The connection is ensured by an ``SystemDSContext`` object. -An ``SystemDSContext`` object can be created using:: +An ``SystemDSContext`` object can be created using + +.. code-block:: python from systemds.context import SystemDSContext sds = SystemDSContext() -When the calculations are finished the context has to be closed again:: +When the calculations are finished the context has to be closed again + +.. code-block:: python sds.close() Since it is annoying that it is always necessary to close the context, ``SystemDSContext`` -implements the python context management protocol, which supports the following syntax:: +implements the python context management protocol, which supports the following syntax + +.. code-block:: python with SystemDSContext() as sds: # do something with sds which is an SystemDSContext diff --git a/src/main/python/docs/source/api/operator/algorithms.rst b/src/main/python/docs/source/api/operator/algorithms.rst index 92bb46e..5055c55 100644 --- a/src/main/python/docs/source/api/operator/algorithms.rst +++ b/src/main/python/docs/source/api/operator/algorithms.rst @@ -45,7 +45,9 @@ As an example the lm algorithm can be used as follows: weights = lm(sds.from_numpy(features), sds.from_numpy(y)).compute() print(weights) -The output should be similar to:: +The output should be similar to + +.. code-block:: python [[-0.11538199] [-0.20386541] diff --git a/src/main/python/docs/source/api/operator/node/matrix.rst b/src/main/python/docs/source/api/operator/node/frame.rst similarity index 77% copy from src/main/python/docs/source/api/operator/node/matrix.rst copy to src/main/python/docs/source/api/operator/node/frame.rst index 06e45ab..d9c039a 100644 --- a/src/main/python/docs/source/api/operator/node/matrix.rst +++ b/src/main/python/docs/source/api/operator/node/frame.rst @@ -19,19 +19,15 @@ .. .. ------------------------------------------------------------- -Matrix -====== +Frame +===== -A ``Matrix`` is represented either by an ``OperationNode``, or the derived class ``Matrix``. -An Matrix can be recognized it by checking the ``output_type`` of the object. - -Matrices are the most fundamental objects SystemDS operates on. +A ``Frame`` is represented either by an ``OperationNode``, or the derived class ``Frame``. Although it is possible to generate matrices with the function calls or object construction specified below, -the recommended way is to use the methods defined on ``SystemDSContext``. +the recommended way is to use the methods defined on ``SystemDSContext``, to read in a frame from disk. -.. autoclass:: systemds.operator.Matrix +.. autoclass:: systemds.operator.Frame :members: - .. automethod:: __init__ - + .. automethod:: __init__ \ No newline at end of file diff --git a/src/main/python/docs/source/api/operator/node/matrix.rst b/src/main/python/docs/source/api/operator/node/list.rst similarity index 62% copy from src/main/python/docs/source/api/operator/node/matrix.rst copy to src/main/python/docs/source/api/operator/node/list.rst index 06e45ab..fa34763 100644 --- a/src/main/python/docs/source/api/operator/node/matrix.rst +++ b/src/main/python/docs/source/api/operator/node/list.rst @@ -19,19 +19,20 @@ .. .. ------------------------------------------------------------- -Matrix -====== +List +==== -A ``Matrix`` is represented either by an ``OperationNode``, or the derived class ``Matrix``. -An Matrix can be recognized it by checking the ``output_type`` of the object. +A ``List`` is represented either by an ``OperationNode``, or the derived class ``List``. -Matrices are the most fundamental objects SystemDS operates on. +List can contain any of the other types: frame, matrix, scalar and itself list. +The list can be handled like a dictionary or a list primitive, since both access patters are the same at +dml script level. -Although it is possible to generate matrices with the function calls or object construction specified below, -the recommended way is to use the methods defined on ``SystemDSContext``. +Although it is possible to generate lists with the function calls or object construction specified below, +the recommended way is to use the methods defined on ``SystemDSContext``, to read in a list from disk, +or construct one using either constructors `array`, `dict` or `list` provided in ``SystemDSContext``. -.. autoclass:: systemds.operator.Matrix +.. autoclass:: systemds.operator.List :members: - .. automethod:: __init__ - + .. automethod:: __init__ \ No newline at end of file diff --git a/src/main/python/docs/source/api/operator/node/matrix.rst b/src/main/python/docs/source/api/operator/node/matrix.rst index 06e45ab..56eb02d 100644 --- a/src/main/python/docs/source/api/operator/node/matrix.rst +++ b/src/main/python/docs/source/api/operator/node/matrix.rst @@ -23,8 +23,6 @@ Matrix ====== A ``Matrix`` is represented either by an ``OperationNode``, or the derived class ``Matrix``. -An Matrix can be recognized it by checking the ``output_type`` of the object. - Matrices are the most fundamental objects SystemDS operates on. Although it is possible to generate matrices with the function calls or object construction specified below, diff --git a/src/main/python/docs/source/api/operator/node/matrix.rst b/src/main/python/docs/source/api/operator/node/scalar.rst similarity index 71% copy from src/main/python/docs/source/api/operator/node/matrix.rst copy to src/main/python/docs/source/api/operator/node/scalar.rst index 06e45ab..3a1ad25 100644 --- a/src/main/python/docs/source/api/operator/node/matrix.rst +++ b/src/main/python/docs/source/api/operator/node/scalar.rst @@ -19,19 +19,17 @@ .. .. ------------------------------------------------------------- -Matrix +Scalar ====== -A ``Matrix`` is represented either by an ``OperationNode``, or the derived class ``Matrix``. -An Matrix can be recognized it by checking the ``output_type`` of the object. +A ``Scalar`` is represented either by an ``OperationNode``, or the derived class ``Scalar``. -Matrices are the most fundamental objects SystemDS operates on. +Scalar can contain strings, ints, floats. -Although it is possible to generate matrices with the function calls or object construction specified below, +Although it is possible to generate Scalars with the function calls or object construction specified below, the recommended way is to use the methods defined on ``SystemDSContext``. -.. autoclass:: systemds.operator.Matrix +.. autoclass:: systemds.operator.Scalar :members: - .. automethod:: __init__ - + .. automethod:: __init__ \ No newline at end of file diff --git a/src/main/python/docs/source/api/operator/node/matrix.rst b/src/main/python/docs/source/api/operator/node/source.rst similarity index 66% copy from src/main/python/docs/source/api/operator/node/matrix.rst copy to src/main/python/docs/source/api/operator/node/source.rst index 06e45ab..d706c2b 100644 --- a/src/main/python/docs/source/api/operator/node/matrix.rst +++ b/src/main/python/docs/source/api/operator/node/source.rst @@ -19,19 +19,17 @@ .. .. ------------------------------------------------------------- -Matrix +Source ====== -A ``Matrix`` is represented either by an ``OperationNode``, or the derived class ``Matrix``. -An Matrix can be recognized it by checking the ``output_type`` of the object. +A ``Source`` is the action of importing method declarations from other DML scripts. +This function allows one to define a function in DML and use it in the python API. -Matrices are the most fundamental objects SystemDS operates on. +Although it is possible to generate sources with the function calls or object construction specified below, +the recommended way is to use the method defined on ``SystemDSContext`` called source to construct one +using a path to the dml file to source. -Although it is possible to generate matrices with the function calls or object construction specified below, -the recommended way is to use the methods defined on ``SystemDSContext``. - -.. autoclass:: systemds.operator.Matrix +.. autoclass:: systemds.operator.Source :members: - .. automethod:: __init__ - + .. automethod:: __init__ \ No newline at end of file diff --git a/src/main/python/docs/source/getting_started/install.rst b/src/main/python/docs/source/getting_started/install.rst index 8505185..ddd9c8d 100644 --- a/src/main/python/docs/source/getting_started/install.rst +++ b/src/main/python/docs/source/getting_started/install.rst @@ -70,7 +70,7 @@ Then to build the system you do the following - Open an terminal at the root of the repository. - Package the Java code using the ``mvn clean package -P distribution`` command - ``cd src/main/python`` to point at the root of the SystemDS Python library. -- Copy `jars` with ``python pre_setup.py`` +- Build the Python API ``python create_python_dist.py`` - Install with ``pip install .`` After this you are ready to go. diff --git a/src/main/python/docs/source/getting_started/simple_examples.rst b/src/main/python/docs/source/getting_started/simple_examples.rst index 939a9fe..8ca44a2 100644 --- a/src/main/python/docs/source/getting_started/simple_examples.rst +++ b/src/main/python/docs/source/getting_started/simple_examples.rst @@ -46,7 +46,9 @@ Making use of SystemDS, let us multiply an Matrix with an scalar: print(m_res.compute()) # context will automatically be closed and process stopped -As output we get:: +As output we get + +.. code-block:: python [[ 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02] [ 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02 13.02] @@ -111,7 +113,9 @@ One example of this is l2SVM, a high level functions for Data-Scientists. Let's model = l2svm(sds.from_numpy(features), sds.from_numpy(labels)).compute() print(model) -The output should be similar to:: +The output should be similar to + +.. code-block:: python [[ 0.02033445] [-0.00324092] diff --git a/src/main/python/docs/source/guide/algorithms_basics.rst b/src/main/python/docs/source/guide/algorithms_basics.rst index b5f6a64..f16ddb4 100644 --- a/src/main/python/docs/source/guide/algorithms_basics.rst +++ b/src/main/python/docs/source/guide/algorithms_basics.rst @@ -36,7 +36,9 @@ Step 1: Get Dataset ------------------- SystemDS provides builtin for downloading and setup of the MNIST dataset. -To setup this simply use:: +To setup this simply use + +.. code-block:: python from systemds.examples.tutorials.mnist import DataManager d = DataManager() @@ -60,7 +62,9 @@ The input X is the training data. It require the data to have two dimensions, the first resemble the number of inputs, and the other the number of features. -Therefore to make the data fit the algorithm we reshape the X dataset, like so:: +Therefore to make the data fit the algorithm we reshape the X dataset, like so + +.. code-block:: python X = X.reshape((60000, 28*28)) @@ -70,7 +74,9 @@ The Y dataset also does not perfectly fit the logistic regression algorithm, thi for this dataset is values ranging from 0, to 9, each label correspond to the integer shown in the image. unfortunately the algorithm require the labels to be distinct integers from 1 and upwards. -Therefore we add 1 to each label such that the labels go from 1 to 10, like this:: +Therefore we add 1 to each label such that the labels go from 1 to 10, like this + +.. code-block:: python Y = Y + 1 @@ -79,31 +85,40 @@ With these steps we are now ready to train a simple model. Step 3: Training ---------------- -To start with, we setup a SystemDS context:: +To start with, we setup a SystemDS context + +.. code-block:: python from systemds.context import SystemDSContext sds = SystemDSContext() -Then setup the data:: +Then setup the data + +.. code-block:: python from systemds.operator import Matrix X_ds = sds.from_numpy(X) Y_ds = sds.from_numpy( Y) to reduce the training time and verify everything works, it is usually good to reduce the amount of data, -to train on a smaller sample to start with:: +to train on a smaller sample to start with + +.. code-block:: python sample_size = 1000 X_ds = sds.from_numpy(X[:sample_size]) Y_ds = sds.from_numpy(Y[:sample_size]) -And now everything is ready for our algorithm:: +And now everything is ready for our algorithm - from systemds.operator.algorithm import multiLogReg +.. code-block:: python + from systemds.operator.algorithm import multiLogReg bias = multiLogReg(X_ds, Y_ds) -Note that nothing has been calculated yet, in SystemDS, since it only happens when you call compute:: +Note that nothing has been calculated yet, in SystemDS, since it only happens when you call compute + +.. code-block:: python bias_r = bias.compute() @@ -115,14 +130,18 @@ Step 3: Validate To see what accuracy the model achieves, we have to load in the test dataset as well. -this can also be extracted from our builtin MNIST loader, to keep the tutorial short the operations are combined:: +this can also be extracted from our builtin MNIST loader, to keep the tutorial short the operations are combined + +.. code-block:: python Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28*28))) Yt = sds.from_numpy(d.get_test_labels()) + 1 The above loads the test data, and reshapes the X data the same way the training data was reshaped. -Finally we verify the accuracy by calling:: +Finally we verify the accuracy by calling + +.. code-block:: python from systemds.operator.algorithm import multiLogRegPredict [m, y_pred, acc] = multiLogRegPredict(Xt, bias, Yt).compute() @@ -145,7 +164,9 @@ Now that we have a working baseline we can start tuning parameters. But first it is valuable to know how much of a difference in performance there is on the training data, vs the test data. This gives an indication of if we have exhausted the learning potential of the training data. -To see how our accuracy is on the training data we use the Predict function again, but with our training data:: +To see how our accuracy is on the training data we use the Predict function again, but with our training data + +.. code-block:: python [m, y_pred, acc] = multiLogRegPredict(X_ds, bias, Y_ds).compute() print(acc) @@ -155,7 +176,9 @@ and have nothing more to learn from the data as it is now. To improve further we have to increase the training data, here for example we increase it from our sample of 1k to the full training dataset of 60k, in this example the maxi is set to reduce the number of iterations the algorithm takes, -to again reduce training time:: +to again reduce training time + +.. code-block:: python X_ds = sds.from_numpy(X) Y_ds = sds.from_numpy(Y) @@ -175,7 +198,9 @@ Full Script The full script, some steps are combined to reduce the overall script. One noteworthy change is the + 1 is done on the matrix ready for SystemDS, -this makes SystemDS responsible for adding the 1 to each value.:: +this makes SystemDS responsible for adding the 1 to each value. + +.. code-block:: python from systemds.context import SystemDSContext from systemds.operator.algorithm import multiLogReg, multiLogRegPredict diff --git a/src/main/python/docs/source/guide/federated.rst b/src/main/python/docs/source/guide/federated.rst index 6b4c2ce..e903a55 100644 --- a/src/main/python/docs/source/guide/federated.rst +++ b/src/main/python/docs/source/guide/federated.rst @@ -35,7 +35,9 @@ A simple guide to do this is in the SystemDS Repository_. .. _Repository: https://github.com/apache/systemds/tree/master/bin/ If that is setup correctly simply start a worker using the following command. -Here the ``8001`` refer to the port used by the worker.:: +Here the ``8001`` refer to the port used by the worker. + +.. code-block:: python systemds WORKER 8001 @@ -45,7 +47,9 @@ Simple Aggregation Example In this example we use a single federated worker, and aggregate the sum of its data. First we need to create some data for our federated worker to use. -In this example we simply use Numpy to create a ``test.csv`` file:: +In this example we simply use Numpy to create a ``test.csv`` file + +.. code-block:: python # Import numpy import numpy as np @@ -59,7 +63,9 @@ To make this simply execute the following:: echo '{ "format":"csv", "header":false, "rows":3, "cols":3 }' > temp/test.csv.mtd After creating our data we the federated worker becomes able to execute federated instructions. -The aggregated sum using federated instructions in python SystemDS is done as follows:: +The aggregated sum using federated instructions in python SystemDS is done as follows + +.. code-block:: python # Import numpy and SystemDS federated import numpy as np @@ -89,13 +95,17 @@ In this example we multiply matrices that are located in different federated env Using the data created from the last example we can simulate multiple federated workers by starting multiple ones on different ports. -Start with 3 different terminals, and run one federated environment in each.:: +Start with 3 different terminals, and run one federated environment in each. + +.. code-block:: python systemds WORKER 8001 systemds WORKER 8002 systemds WORKER 8003 -Once all three workers are up and running we can leverage all three in the following example:: +Once all three workers are up and running we can leverage all three in the following example + +.. code-block:: python # Import numpy and SystemDS federated import numpy as np @@ -122,7 +132,9 @@ Once all three workers are up and running we can leverage all three in the follo print(res) -The print should look like:: +The print should look like + +.. code-block:: python [[ 1. 4. 9. 1. 4. 9.] [16. 25. 36. 16. 25. 36.] diff --git a/src/main/python/docs/source/index.rst b/src/main/python/docs/source/index.rst index f05d607..bf4e154 100644 --- a/src/main/python/docs/source/index.rst +++ b/src/main/python/docs/source/index.rst @@ -62,13 +62,17 @@ tensors (multi-dimensional arrays) whose first dimension may have a heterogeneou api/context/systemds_context.rst api/operator/algorithms.rst api/operator/node/matrix.rst - api/operator/operation_node.rst + api/operator/node/frame.rst + api/operator/node/list.rst + api/operator/node/scalar.rst + api/operator/node/source.rst .. toctree:: :maxdepth: 1 :hidden: :caption: Internals API + api/operator/operation_node.rst api/script_building/dag.rst api/script_building/script.rst api/utils/converters.rst diff --git a/src/main/python/generator/generator.py b/src/main/python/generator/generator.py index 39dc59e..f9a7a19 100644 --- a/src/main/python/generator/generator.py +++ b/src/main/python/generator/generator.py @@ -89,7 +89,7 @@ class PythonAPIFileGenerator(object): init_file.write(self.init_import.format(function=f)) init_file.write("\n") init_file.write(self.init_all.format( - functions=self.function_names).replace("'", "")) + functions=self.function_names).replace(",",",\n")) class PythonAPIFunctionGenerator(object): diff --git a/src/main/python/systemds/__init__.py b/src/main/python/systemds/__init__.py index ab3b987..27d7425 100644 --- a/src/main/python/systemds/__init__.py +++ b/src/main/python/systemds/__init__.py @@ -23,4 +23,4 @@ from systemds import context from systemds import operator from systemds import examples -__all__ = [context, operator, examples] +__all__ = ["context", "operator", "examples"] diff --git a/src/main/python/systemds/context/__init__.py b/src/main/python/systemds/context/__init__.py index dd8417b..02f4a3d 100644 --- a/src/main/python/systemds/context/__init__.py +++ b/src/main/python/systemds/context/__init__.py @@ -21,4 +21,4 @@ from systemds.context.systemds_context import SystemDSContext -__all__ = [SystemDSContext] +__all__ = ["SystemDSContext"] diff --git a/src/main/python/systemds/context/systemds_context.py b/src/main/python/systemds/context/systemds_context.py index 8a2ebd6..eb65887 100644 --- a/src/main/python/systemds/context/systemds_context.py +++ b/src/main/python/systemds/context/systemds_context.py @@ -38,7 +38,8 @@ import numpy as np import pandas as pd from py4j.java_gateway import GatewayParameters, JavaGateway from py4j.protocol import Py4JNetworkError -from systemds.operator import Frame, Matrix, OperationNode, Scalar, Source, List +from systemds.operator import (Frame, List, Matrix, OperationNode, Scalar, + Source) from systemds.script_building import OutputType from systemds.utils.consts import VALID_INPUT_TYPES from systemds.utils.helpers import get_module_dir @@ -46,7 +47,11 @@ from systemds.utils.helpers import get_module_dir class SystemDSContext(object): """A context with a connection to a java instance with which SystemDS operations are executed. - The java process is started and is running using a random tcp port for instruction parsing.""" + The java process is started and is running using a random tcp port for instruction parsing. + + This class is used as the starting point for all SystemDS execution. It gives the ability to create + all the different objects and adding them to the exectution. + """ java_gateway: JavaGateway @@ -324,9 +329,9 @@ class SystemDSContext(object): def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: """ Read an file from disk. Supportted types include: - CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binay + CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binary, etc. See: http://apache.github.io/systemds/site/dml-language-reference#readwrite-built-in-functions for more details - :return: an Operation Node, containing the read data. + :return: an Operation Node, containing the read data the operationNode read can be of types, Matrix, Frame or Scalar. """ mdt_filepath = path + ".mtd" if os.path.exists(mdt_filepath): @@ -353,9 +358,9 @@ class SystemDSContext(object): print("WARNING: Unknown type read please add a mtd file, or specify in arguments") return OperationNode(self, "read", [f'"{path}"'], named_input_nodes=kwargs) - def scalar(self, v: Dict[str, VALID_INPUT_TYPES]) -> 'Scalar': + def scalar(self, v: Dict[str, VALID_INPUT_TYPES]) -> Scalar: """ Construct an scalar value, this can contain str, float, double, integers and booleans. - :return: An `OperationNode` containing the scalar value. + :return: A scalar containing the given value. """ if type(v) is str: if not ((v[0] == '"' and v[-1] == '"') or (v[0] == "'" and v[-1] == "'")): @@ -374,6 +379,7 @@ class SystemDSContext(object): :param mat: the numpy array :param args: unnamed parameters :param kwargs: named parameters + :return: A Matrix """ unnamed_params = ['\'./tmp/{file_name}\''] @@ -398,6 +404,7 @@ class SystemDSContext(object): :param df: the pandas dataframe :param args: unnamed parameters :param kwargs: named parameters + :return: A Frame """ unnamed_params = ["'./tmp/{file_name}'"] @@ -427,7 +434,7 @@ class SystemDSContext(object): :param ranges: for each federated worker a pair of begin and end index of their held matrix :param args: unnamed params :param kwargs: named params - :return: the OperationNode representing this operation + :return: The Matrix containing the Federated data. """ addresses_str = 'list(' + \ ','.join(map(lambda s: f'"{s}"', addresses)) + ')' @@ -440,7 +447,7 @@ class SystemDSContext(object): named_params.update(kwargs) return Matrix(self, 'federated', args, named_params) - def source(self, path: str, name: str, print_imported_methods: bool = False): + def source(self, path: str, name: str, print_imported_methods: bool = False) -> Source: """Import methods from a given dml file. The importing is done thorugh the DML command source, and adds all defined methods from @@ -459,5 +466,32 @@ class SystemDSContext(object): """ return Source(self, path, name, print_imported_methods) - def list(self, *args: Sequence[VALID_INPUT_TYPES], **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'List': + def list(self, *args: Sequence[VALID_INPUT_TYPES], **kwargs: Dict[str, VALID_INPUT_TYPES]) -> List: + """ Create a List object containing the given nodes. + + Note that only a sequence is allowed, or a dictionary, not both at the same time. + :param args: A Sequence that will be inserted to a list + :param kwargs: A Dictionary that will return a dictionary, (internally handled as a list) + :return: A List + """ return List(self, unnamed_input_nodes=args, named_input_nodes=kwargs) + + def array(self, *args: Sequence[VALID_INPUT_TYPES]) -> List: + """ Create a List object containing the given nodes. + + Note that only a sequence is allowed, or a dictionary, not both at the same time. + :param args: A Sequence that will be inserted to a list + :param kwargs: A Dictionary that will return a dictionary, (internally handled as a list) + :return: A List + """ + return List(self, unnamed_input_nodes=args) + + def dict(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> List: + """ Create a List object containing the given nodes. + + Note that only a sequence is allowed, or a dictionary, not both at the same time. + :param args: A Sequence that will be inserted to a list + :param kwargs: A Dictionary that will return a dictionary, (internally handled as a list) + :return: A List + """ + return List(self, named_input_nodes=kwargs) diff --git a/src/main/python/systemds/operator/__init__.py b/src/main/python/systemds/operator/__init__.py index ebdb6ee..c157744 100644 --- a/src/main/python/systemds/operator/__init__.py +++ b/src/main/python/systemds/operator/__init__.py @@ -29,4 +29,4 @@ from systemds.operator.nodes.list import List from systemds.operator.nodes.source import Source from systemds.operator import algorithm -__all__ = [OperationNode, algorithm, Scalar, List, ListAccess, Matrix, Frame, Source, MultiReturn] +__all__ = ["OperationNode", "algorithm", "Scalar", "List", "ListAccess", "Matrix", "Frame", "Source", "MultiReturn"] diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py index d2e5d83..9658832 100644 --- a/src/main/python/systemds/operator/algorithm/__init__.py +++ b/src/main/python/systemds/operator/algorithm/__init__.py @@ -113,4 +113,94 @@ from .builtin.winsorize import winsorize from .builtin.xdummy1 import xdummy1 from .builtin.xdummy2 import xdummy2 -__all__ = [abstain, als, alsCG, alsDS, alsPredict, alsTopkPredict, arima, bandit, bivar, components, confusionMatrix, cor, cox, cspline, csplineDS, cvlm, dbscan, decisionTree, discoverFD, dist, executePipeline, gaussianClassifier, getAccuracy, glm, gmm, gmmPredict, gnmf, hyperband, img_brightness, img_crop, img_cutout, img_invert, img_mirror, img_posterize, img_rotate, img_sample_pairing, img_shear, img_transform, img_translate, imputeByFD, imputeByMean, imputeByMedian, imputeByMode, int [...] +__all__ = ['abstain', + 'als', + 'alsCG', + 'alsDS', + 'alsPredict', + 'alsTopkPredict', + 'arima', + 'bandit', + 'bivar', + 'components', + 'confusionMatrix', + 'cor', + 'cox', + 'cspline', + 'csplineDS', + 'cvlm', + 'dbscan', + 'decisionTree', + 'discoverFD', + 'dist', + 'executePipeline', + 'gaussianClassifier', + 'getAccuracy', + 'glm', + 'gmm', + 'gmmPredict', + 'gnmf', + 'hyperband', + 'img_brightness', + 'img_crop', + 'img_cutout', + 'img_invert', + 'img_mirror', + 'img_posterize', + 'img_rotate', + 'img_sample_pairing', + 'img_shear', + 'img_transform', + 'img_translate', + 'imputeByFD', + 'imputeByMean', + 'imputeByMedian', + 'imputeByMode', + 'intersect', + 'km', + 'kmeans', + 'kmeansPredict', + 'knnbf', + 'l2svm', + 'l2svmPredict', + 'lasso', + 'lm', + 'lmCG', + 'lmDS', + 'lmPredict', + 'logSumExp', + 'msvm', + 'msvmPredict', + 'multiLogReg', + 'multiLogRegPredict', + 'na_locf', + 'naiveBayes', + 'naiveBayesPredict', + 'normalize', + 'outlier', + 'outlierByArima', + 'outlierByIQR', + 'outlierBySd', + 'pca', + 'pnmf', + 'ppca', + 'randomForest', + 'scale', + 'scaleApply', + 'sherlock', + 'sherlockPredict', + 'sigmoid', + 'slicefinder', + 'smote', + 'split', + 'splitBalanced', + 'stableMarriage', + 'statsNA', + 'steplm', + 'toOneHot', + 'tomeklink', + 'univar', + 'vectorToCsv', + 'winsorize', + 'xdummy1', + 'xdummy2'] diff --git a/src/main/python/tests/list/test_list.py b/src/main/python/tests/list/test_list.py index ada06c4..44cbf29 100644 --- a/src/main/python/tests/list/test_list.py +++ b/src/main/python/tests/list/test_list.py @@ -49,7 +49,7 @@ class TestListOperations(unittest.TestCase): m1p = self.sds.from_numpy(m1) m2 = np.array([4., 5., 6.]) m2p = self.sds.from_numpy(m2) - list_obj = self.sds.list(m1p, m2p) + list_obj = self.sds.array(m1p, m2p) tmp = list_obj[0] + list_obj[1] res = tmp.compute().flatten() self.assertTrue(np.allclose(m1 + m2, res)) @@ -62,7 +62,7 @@ class TestListOperations(unittest.TestCase): m1p = self.sds.from_numpy(m1) m2 = np.array([4., 5., 6.]) m2p = self.sds.from_numpy(m2) - list_obj = self.sds.list(m1p, m2p) + list_obj = self.sds.array(m1p, m2p) tmp = list_obj[0] + 2 res = tmp.compute().flatten() self.assertTrue(np.allclose(m1 + 2, res))
