This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new bc1e438 [SYSTEMDS-3195] Python federated tutorial test
bc1e438 is described below
commit bc1e4388cb0100a26d15b9d56a86dcba7b3d0454
Author: baunsgaard <[email protected]>
AuthorDate: Tue Nov 2 13:22:54 2021 +0100
[SYSTEMDS-3195] Python federated tutorial test
---
.../docs/source/code/federatedTutorial_part1.py | 29 +++++++++
.../docs/source/code/federatedTutorial_part2.py | 39 +++++++++++
.../docs/source/code/federatedTutorial_part3.py | 47 ++++++++++++++
.../source/code/federatedTutorial_part3_old.py | 44 +++++++++++++
.../source/code/federatedTutorial_part3_old2.py | 55 ++++++++++++++++
src/main/python/docs/source/guide/federated.rst | 75 +++++-----------------
src/main/python/tests/federated/runFedTest.sh | 6 ++
.../tests/federated/test_federated_tutorial.py | 39 +++++++++++
8 files changed, 274 insertions(+), 60 deletions(-)
diff --git a/src/main/python/docs/source/code/federatedTutorial_part1.py
b/src/main/python/docs/source/code/federatedTutorial_part1.py
new file mode 100644
index 0000000..7795c4b
--- /dev/null
+++ b/src/main/python/docs/source/code/federatedTutorial_part1.py
@@ -0,0 +1,29 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+# Python
+import numpy as np
+import os
+if not os.path.isdir("temp"):
+ os.mkdir("temp")
+a = np.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+np.savetxt("temp/test.csv", a, delimiter=",")
+with open("temp/test.csv.mtd", "w") as mtd:
+ mtd.write('{ "format":"csv", "header":false, "rows":3, "cols":3 }')
diff --git a/src/main/python/docs/source/code/federatedTutorial_part2.py
b/src/main/python/docs/source/code/federatedTutorial_part2.py
new file mode 100644
index 0000000..ac9c0bf
--- /dev/null
+++ b/src/main/python/docs/source/code/federatedTutorial_part2.py
@@ -0,0 +1,39 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+# Python
+import numpy as np
+from systemds.context import SystemDSContext
+
+# Create a federated matrix
+# Indicate the dimensions of the data:
+# Here the first list in the tuple is the top left Coordinate,
+# and the second the bottom left coordinate.
+# It is ordered as [col,row].
+dims = ([0, 0], [3, 3])
+
+# Specify the address + file path from worker:
+address = "localhost:8001/temp/test.csv"
+
+with SystemDSContext() as sds:
+ fed_a = sds.federated([address], [dims])
+ # Sum the federated matrix and call compute to execute
+ print(fed_a.sum().compute())
+ # Result should be 45.
diff --git a/src/main/python/docs/source/code/federatedTutorial_part3.py
b/src/main/python/docs/source/code/federatedTutorial_part3.py
new file mode 100644
index 0000000..1d1125a
--- /dev/null
+++ b/src/main/python/docs/source/code/federatedTutorial_part3.py
@@ -0,0 +1,47 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+# Python
+import numpy as np
+from systemds.context import SystemDSContext
+
+addr1 = "localhost:8001/temp/test.csv"
+addr2 = "localhost:8002/temp/test.csv"
+addr3 = "localhost:8003/temp/test.csv"
+
+# Create a federated matrix using two federated environments
+# Note that the two federated matrices are stacked on top of each other
+
+with SystemDSContext() as sds:
+ # federated data on three locations
+ fed = sds.federated([addr1, addr2, addr3], [
+ ([0, 0], [3, 3]),
+ ([3, 0], [6, 3]),
+ ([6, 0], [9, 3])])
+ # local matrix to multiply with
+ loc = sds.from_numpy(np.array([
+ [1,2,3,4,5,6,7,8,9],
+ [1,2,3,4,5,6,7,8,9],
+ [1,2,3,4,5,6,7,8,9]
+ ]))
+ # Multiply local and federated
+ ret = loc @ fed
+ # execute the lazy script and print
+ print(ret.compute())
diff --git a/src/main/python/docs/source/code/federatedTutorial_part3_old.py
b/src/main/python/docs/source/code/federatedTutorial_part3_old.py
new file mode 100644
index 0000000..eaa3f1d
--- /dev/null
+++ b/src/main/python/docs/source/code/federatedTutorial_part3_old.py
@@ -0,0 +1,44 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+# Python
+import numpy as np
+from systemds.context import SystemDSContext
+
+addr1 = "localhost:8001/temp/test.csv"
+addr2 = "localhost:8002/temp/test.csv"
+addr3 = "localhost:8003/temp/test.csv"
+
+# Create a federated matrix using two federated environments
+# Note that the two federated matrices are stacked on top of each other
+
+with SystemDSContext() as sds:
+ fed_a = sds.federated(
+ [addr1, addr2],
+ [([0, 0], [3, 3]), ([0, 3], [3, 6])])
+
+ fed_b = sds.federated(
+ [addr1, addr3],
+ [([0, 0], [3, 3]), ([0, 3], [3, 6])])
+
+ # Multiply, compute and print.
+ res = (fed_a * fed_b).compute()
+
+print(res)
diff --git a/src/main/python/docs/source/code/federatedTutorial_part3_old2.py
b/src/main/python/docs/source/code/federatedTutorial_part3_old2.py
new file mode 100644
index 0000000..a6bf94f
--- /dev/null
+++ b/src/main/python/docs/source/code/federatedTutorial_part3_old2.py
@@ -0,0 +1,55 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+# Python
+import numpy as np
+from systemds.context import SystemDSContext
+
+addr1 = "localhost:8001/temp/test.csv"
+addr2 = "localhost:8002/temp/test.csv"
+addr3 = "localhost:8003/temp/test.csv"
+
+# Create a federated matrix using two federated environments
+# Note that the two federated matrices are stacked on top of each other
+
+with SystemDSContext() as sds:
+
+ fed_a = sds.federated([addr1],[([0, 0], [3, 3])])
+ fed_b = sds.federated([addr2],[([0, 0], [3, 3])])
+ # fed_c = sds.federated([addr3],[([0, 0], [3, 3])])
+
+ np_array = np.array([[1,2,3],[4,5,6],[7,8,9]])
+
+ loc_a = sds.from_numpy(np_array)
+ loc_b = sds.from_numpy(np_array)
+
+ fed_res = fed_a @ fed_b
+ loc_res = loc_a @ loc_b
+
+ hybrid_res_1 = fed_a @ loc_b
+ hybrid_res_2 = loc_a @ fed_b
+
+ # compute and print
+ print(fed_a.compute())
+ print(fed_b.compute())
+ print(fed_res.compute(verbose=True))
+ print(loc_res.compute(verbose=True))
+ print(hybrid_res_1.compute())
+ print(hybrid_res_1.compute())
diff --git a/src/main/python/docs/source/guide/federated.rst
b/src/main/python/docs/source/guide/federated.rst
index 051ad03..fefea54 100644
--- a/src/main/python/docs/source/guide/federated.rst
+++ b/src/main/python/docs/source/guide/federated.rst
@@ -37,7 +37,7 @@ A simple guide to do this is in the SystemDS Repository_.
If that is setup correctly simply start a worker using the following command.
Here the ``8001`` refer to the port used by the worker.
-.. code-block:: python
+.. code-block::
systemds WORKER 8001
@@ -47,45 +47,22 @@ Simple Aggregation Example
In this example we use a single federated worker, and aggregate the sum of its
data.
First we need to create some data for our federated worker to use.
-In this example we simply use Numpy to create a ``test.csv`` file
-
-.. code-block:: python
-
- # Import numpy
- import numpy as np
- a = np.asarray([[1,2,3], [4,5,6], [7,8,9]])
- np.savetxt("temp/test.csv", a, delimiter=",")
+In this example we simply use Numpy to create a ``test.csv`` file.
Currently we also require a metadata file for the federated worker.
This should be located next to the ``test.csv`` file called ``test.csv.mtd``.
-To make this simply execute the following::
+To make both the data and metadata simply execute the following
- echo '{ "format":"csv", "header":false, "rows":3, "cols":3 }' >
temp/test.csv.mtd
+.. include:: ../code/federatedTutorial_part1.py
+ :start-line: 20
+ :code: python
-After creating our data we the federated worker becomes able to execute
federated instructions.
+After creating our data the federated worker becomes able to execute federated
instructions.
The aggregated sum using federated instructions in python SystemDS is done as
follows
-.. code-block:: python
-
- # Import numpy and SystemDS
- import numpy as np
- from systemds.context import SystemDSContext
-
- # Create a federated matrix
- ## Indicate the dimensions of the data:
- ### Here the first list in the tuple is the top left Coordinate,
- ### and the second the bottom left coordinate.
- ### It is ordered as [col,row].
- dims = ([0,0], [3,3])
-
- ## Specify the address + file path from worker:
- address = "localhost:8001/temp/test.csv"
-
- with SystemDSContext() as sds:
- fed_a = sds.federated([address], [dims])
- # Sum the federated matrix and call compute to execute
- print(fed_a.sum().compute())
- # Result should be 45.
+.. include:: ../code/federatedTutorial_part2.py
+ :start-line: 20
+ :code: python
Multiple Federated Environments
-------------------------------
@@ -96,7 +73,7 @@ Using the data created from the last example we can simulate
multiple federated workers by starting multiple ones on different ports.
Start with 3 different terminals, and run one federated environment in each.
-.. code-block:: python
+.. code-block::
systemds WORKER 8001
systemds WORKER 8002
@@ -104,35 +81,13 @@ Start with 3 different terminals, and run one federated
environment in each.
Once all three workers are up and running we can leverage all three in the
following example
-.. code-block:: python
-
- import numpy as np
- from systemds.context import SystemDSContext
-
- addr1 = "localhost:8001/temp/test.csv"
- addr2 = "localhost:8002/temp/test.csv"
- addr3 = "localhost:8003/temp/test.csv"
-
- # Create a federated matrix using two federated environments
- # Note that the two federated matrices are stacked on top of each other
-
- with SystemDSContext() as sds:
- fed_a = sds.federated(
- [addr1, addr2],
- [([0,0], [3,3]), ([0,3], [3,6])])
-
- fed_b = sds.federated(
- [addr1, addr3],
- [([0,0], [3,3]), ([0,3], [3,6])])
-
- # Multiply, compute and print.
- res = (fed_a * fed_b).compute()
-
- print(res)
+.. include:: ../code/federatedTutorial_part3.py
+ :start-line: 20
+ :code: python
The print should look like
-.. code-block:: python
+.. code-block::
[[ 1. 4. 9. 1. 4. 9.]
[16. 25. 36. 16. 25. 36.]
diff --git a/src/main/python/tests/federated/runFedTest.sh
b/src/main/python/tests/federated/runFedTest.sh
index b34ca99..90e24a5 100755
--- a/src/main/python/tests/federated/runFedTest.sh
+++ b/src/main/python/tests/federated/runFedTest.sh
@@ -34,6 +34,7 @@ mkdir -p $workerdir
mkdir -p $outputdir
w1_Output="$workerdir/w1"
w2_Output="$workerdir/w2"
+w3_Output="$workerdir/w3"
log="$outputdir/out.log"
# Make the workers start quietly and pipe their output to a file to print later
@@ -42,12 +43,15 @@ systemds WORKER 8001 >$w1_Output 2>&1 &
Fed1=$!
systemds WORKER 8002 >$w2_Output 2>&1 &
Fed2=$!
+systemds WORKER 8003 >$w3_Output 2>&1 &
+Fed3=$!
echo "Starting workers" && sleep 3 && echo "Starting tests"
# Run test
python -m unittest discover -s tests/federated -p 'test_*.py' $1 >$log 2>&1
pkill -P $Fed1
pkill -P $Fed2
+pkill -P $Fed3
# Print output
echo -e "\n---------------\nWorkers Output:\n---------------"
@@ -55,6 +59,8 @@ echo -e "\nWorker 1:"
cat $w1_Output
echo -e "\nWorker 2:"
cat $w2_Output
+echo -e "\nWorker 3:"
+cat $w3_Output
echo -e "\n------------\nTest output:\n------------"
cat $log
grepvals="$(tail -n 10 $log | grep OK)"
diff --git a/src/main/python/tests/federated/test_federated_tutorial.py
b/src/main/python/tests/federated/test_federated_tutorial.py
new file mode 100644
index 0000000..d9efdd6
--- /dev/null
+++ b/src/main/python/tests/federated/test_federated_tutorial.py
@@ -0,0 +1,39 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import shutil
+import unittest
+
+
+class TestFederatedAggFn(unittest.TestCase):
+
+ @classmethod
+ def tearDownClass(cls):
+ shutil.rmtree("temp")
+
+ def test_part1(self):
+ import docs.source.code.federatedTutorial_part1
+
+ def test_part2(self):
+ import docs.source.code.federatedTutorial_part2
+
+ def test_part3(self):
+ import docs.source.code.federatedTutorial_part3