This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 154b9ff6cd [SYSTEMDS-3397] Python NN testExample
154b9ff6cd is described below
commit 154b9ff6cdc15a917b4714914171372f84eb82e3
Author: baunsgaard <[email protected]>
AuthorDate: Tue Jun 28 22:40:53 2022 +0200
[SYSTEMDS-3397] Python NN testExample
A simple python example of neural network training and preprocessing.
two different scenarios are tested.
1. Train and measure accuracy in one go
2. Train a model then save it, to then load and predict.
Closes #1648
---
src/main/python/.gitignore | 5 +-
src/main/python/tests/README.md | 4 +
.../tests/examples/tutorials/neural_net_source.dml | 129 +++++-------
.../python/tests/examples/tutorials/test_adult.py | 217 +--------------------
.../tests/examples/tutorials/test_adult_neural.py | 130 ++++++++++++
5 files changed, 191 insertions(+), 294 deletions(-)
diff --git a/src/main/python/.gitignore b/src/main/python/.gitignore
index f60a72e9ad..ad5ef4dc44 100644
--- a/src/main/python/.gitignore
+++ b/src/main/python/.gitignore
@@ -1,5 +1,4 @@
-
# Git ignore for python files.
systemds/lib/
systemds.egg-info/
@@ -15,10 +14,10 @@ tests/onnx_systemds/output_test
tests/onnx_systemds/dml_output
tests/onnx_systemds/test_models/*.onnx
-# git ignore tmp federated files
+# git ignore tmp test files
tests/federated/output
tests/federated/worker
tests/federated/tmp
-
tests/list/tmp
tests/algorithms/readwrite/
+tests/examples/tutorials/model
diff --git a/src/main/python/tests/README.md b/src/main/python/tests/README.md
index bf6b6e35f0..24e0f01863 100644
--- a/src/main/python/tests/README.md
+++ b/src/main/python/tests/README.md
@@ -24,7 +24,11 @@ Tests are easily executed using unittest:
But before executing the tests it is recommended to go through systemds
[Setting SYSTEMDS_ROOT environment](/bin/README.md)
```bash
+# Single thread:
python -m unittest discover -s tests -p 'test_*.py'
+
+# Parallel
+unittest-parallel -t . -s tests --module-fixtures
```
This command searches through the test directory and finds all python files
starting with `test_` and executes them.
diff --git a/src/main/python/tests/examples/tutorials/neural_net_source.dml
b/src/main/python/tests/examples/tutorials/neural_net_source.dml
index 037ba7891b..8615f04991 100644
--- a/src/main/python/tests/examples/tutorials/neural_net_source.dml
+++ b/src/main/python/tests/examples/tutorials/neural_net_source.dml
@@ -21,9 +21,10 @@
# Imports
source("nn/layers/affine.dml") as affine
-source("nn/layers/logcosh_loss.dml") as logcosh
-source("nn/layers/elu.dml") as elu
+source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
+source("nn/layers/relu.dml") as relu
source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/softmax.dml") as softmax
source("nn/optim/sgd.dml") as sgd
init_model = function(Integer inputDimension, Integer outputDimension, int
seed = -1)
@@ -36,26 +37,23 @@ init_model = function(Integer inputDimension, Integer
outputDimension, int seed
model = list(W1, W2, W3, b1, b2, b3)
}
-
predict = function(matrix[double] X,
list[unknown] model)
return (matrix[double] probs) {
- W1 = as.matrix(model[1])
- W2 = as.matrix(model[2])
- W3 = as.matrix(model[3])
- b1 = as.matrix(model[4])
- b2 = as.matrix(model[5])
- b3 = as.matrix(model[6])
+ W1 = as.matrix(model[1]); b1 = as.matrix(model[4])
+ W2 = as.matrix(model[2]); b2 = as.matrix(model[5])
+ W3 = as.matrix(model[3]); b3 = as.matrix(model[6])
- out1elu = elu::forward(affine::forward(X, W1, b1),1)
- out2elu = elu::forward(affine::forward(out1elu, W2, b2),1)
- probs = elu::forward(affine::forward(out2elu, W3, b3),1)
+ out1a = sigmoid::forward(affine::forward(X, W1, b1))
+ out2a = relu::forward(affine::forward(out1a, W2, b2))
+ probs = softmax::forward(affine::forward(out2a, W3, b3))
}
eval = function(matrix[double] probs, matrix[double] y)
- return (double loss) {
- loss = logcosh::forward(probs, y)
+ return (double accuracy) {
+ correct_pred = rowIndexMax(probs) == rowIndexMax(y)
+ accuracy = mean(correct_pred)
}
gradients = function(list[unknown] model,
@@ -64,33 +62,31 @@ gradients = function(list[unknown] model,
matrix[double] labels)
return (list[unknown] gradients) {
- W1 = as.matrix(model[1])
- W2 = as.matrix(model[2])
- W3 = as.matrix(model[3])
- b1 = as.matrix(model[4])
- b2 = as.matrix(model[5])
- b3 = as.matrix(model[6])
-
+ W1 = as.matrix(model[1]); b1 = as.matrix(model[4])
+ W2 = as.matrix(model[2]); b2 = as.matrix(model[5])
+ W3 = as.matrix(model[3]); b3 = as.matrix(model[6])
+
# Compute forward pass
out1 = affine::forward(features, W1, b1)
- out1elu = elu::forward(out1, 1)
- out2 = affine::forward(out1elu, W2, b2)
- out2elu = elu::forward(out2, 1)
- out3 = affine::forward(out2elu, W3, b3)
- probs = elu::forward(out3,1)
+ out1a = sigmoid::forward(out1)
+ out2 = affine::forward(out1a, W2, b2)
+ out2a = relu::forward(out2)
+ out3 = affine::forward(out2a, W3, b3)
+ probs = softmax::forward(out3)
# Compute loss & accuracy for training data
- loss = logcosh::forward(probs, labels)
+ loss = cross_entropy_loss::forward(probs, labels)
print("Batch loss: " + loss)
# Compute data backward pass
- dprobs = logcosh::backward(probs, labels)
- dout3 = elu::backward(dprobs, out3, 1)
- [dout2elu, dW3, db3] = affine::backward(dout3, out2elu, W3, b3)
- dout2 = elu::backward(dout2elu, out2, 1)
- [dout1elu, dW2, db2] = affine::backward(dout2, out1elu, W2, b2)
- dout1 = elu::backward(dout1elu, out1, 1)
- [dfeatures, dW1, db1] = affine::backward(dout1, features, W1, b1)
+ # Note it is same arguments as forward with one extra argument in front
+ dloss = cross_entropy_loss::backward(probs, labels)
+ dout3 = softmax::backward(dloss, out3)
+ [dout2a, dW3, db3] = affine::backward(dout3, out2a, W3, b3)
+ dout2 = relu::backward(dout2a, out2)
+ [dout1a, dW2, db2] = affine::backward(dout2, out1a, W2, b2)
+ dout1 = sigmoid::backward(dout1a, out1)
+ [a, dW1, db1] = affine::backward(dout1, features, W1, b1)
gradients = list(dW1, dW2, dW3, db1, db2, db3)
}
@@ -100,18 +96,13 @@ aggregation = function(list[unknown] model,
list[unknown] gradients)
return (list[unknown] model_result) {
- W1 = as.matrix(model[1])
- W2 = as.matrix(model[2])
- W3 = as.matrix(model[3])
- b1 = as.matrix(model[4])
- b2 = as.matrix(model[5])
- b3 = as.matrix(model[6])
- dW1 = as.matrix(gradients[1])
- dW2 = as.matrix(gradients[2])
- dW3 = as.matrix(gradients[3])
- db1 = as.matrix(gradients[4])
- db2 = as.matrix(gradients[5])
- db3 = as.matrix(gradients[6])
+ W1 = as.matrix(model[1]); dW1 = as.matrix(gradients[1])
+ W2 = as.matrix(model[2]); dW2 = as.matrix(gradients[2])
+ W3 = as.matrix(model[3]); dW3 = as.matrix(gradients[3])
+ b1 = as.matrix(model[4]); db1 = as.matrix(gradients[4])
+ b2 = as.matrix(model[5]); db2 = as.matrix(gradients[5])
+ b3 = as.matrix(model[6]); db3 = as.matrix(gradients[6])
+
learning_rate = as.double(as.scalar(hyperparams["learning_rate"]))
# Optimize with SGD
@@ -125,7 +116,6 @@ aggregation = function(list[unknown] model,
model_result = list(W1, W2, W3, b1, b2, b3)
}
-
train = function(matrix[double] X, matrix[double] y,
int epochs, int batch_size, double learning_rate,
int seed = -1)
@@ -136,12 +126,9 @@ train = function(matrix[double] X, matrix[double] y,
K = ncol(y) # num classes
model = init_model(D, K, seed)
- W1 = as.matrix(model[1])
- W2 = as.matrix(model[2])
- W3 = as.matrix(model[3])
- b1 = as.matrix(model[4])
- b2 = as.matrix(model[5])
- b3 = as.matrix(model[6])
+ W1 = as.matrix(model[1]); b1 = as.matrix(model[4])
+ W2 = as.matrix(model[2]); b2 = as.matrix(model[5])
+ W3 = as.matrix(model[3]); b3 = as.matrix(model[6])
# Create the hyper parameter list
hyperparams = list(learning_rate=learning_rate)
@@ -163,13 +150,10 @@ train = function(matrix[double] X, matrix[double] y,
gradients_list = gradients(model_list, hyperparams, X_batch, y_batch)
model_updated = aggregation(model_list, hyperparams, gradients_list)
- W1 = as.matrix(model_updated[1])
- W2 = as.matrix(model_updated[2])
- W3 = as.matrix(model_updated[3])
- b1 = as.matrix(model_updated[4])
- b2 = as.matrix(model_updated[5])
- b3 = as.matrix(model_updated[6])
-
+ W1 = as.matrix(model_updated[1]); b1 = as.matrix(model_updated[4])
+ W2 = as.matrix(model_updated[2]); b2 = as.matrix(model_updated[5])
+ W3 = as.matrix(model_updated[3]); b3 = as.matrix(model_updated[6])
+
}
}
@@ -178,9 +162,13 @@ train = function(matrix[double] X, matrix[double] y,
train_paramserv = function(matrix[Double] X, matrix[Double] y,
Integer epochs, Integer batch_size, Double learning_rate, Integer workers,
- String utype, String freq, String mode, Integer seed)
+ Integer seed)
return (list[unknown] model_trained) {
+ utype = "BSP"
+ freq = "BATCH"
+ mode = "LOCAL"
+
N = nrow(X) # num examples
D = ncol(X) # num features
K = ncol(y) # num classes
@@ -194,24 +182,9 @@ train_paramserv = function(matrix[Double] X,
matrix[Double] y,
# Use paramserv function
model_trained = paramserv(model=model_list, features=X, labels=y,
val_features=matrix(0, rows=0, cols=0), val_labels=matrix(0, rows=0,
cols=0),
- upd="./network/TwoNN.dml::gradients",
agg="./network/TwoNN.dml::aggregation",
+ upd="./tests/examples/tutorials/neural_net_source.dml::gradients",
+ agg="./tests/examples/tutorials/neural_net_source.dml::aggregation",
mode=mode, utype=utype, freq=freq, epochs=epochs, batchsize=batch_size,
k=workers, hyperparams=params, checkpointing="NONE")
}
-
-save_model = function (list[unknown] model, String baseFolder){
- W1 = as.matrix(model[1])
- W2 = as.matrix(model[2])
- W3 = as.matrix(model[3])
- b1 = as.matrix(model[4])
- b2 = as.matrix(model[5])
- b3 = as.matrix(model[6])
-
- write(W1, (baseFolder + "/W1.data"), format="binary")
- write(W2, (baseFolder + "/W2.data"), format="binary")
- write(W3, (baseFolder + "/W3.data"), format="binary")
- write(b1, (baseFolder + "/b1.data"), format="binary")
- write(b2, (baseFolder + "/b2.data") , format="binary")
- write(b3, (baseFolder + "/b3.data") , format="binary")
-}
\ No newline at end of file
diff --git a/src/main/python/tests/examples/tutorials/test_adult.py
b/src/main/python/tests/examples/tutorials/test_adult.py
index ddafc96e28..d327676977 100644
--- a/src/main/python/tests/examples/tutorials/test_adult.py
+++ b/src/main/python/tests/examples/tutorials/test_adult.py
@@ -18,20 +18,17 @@
# under the License.
#
# -------------------------------------------------------------
-import os
+
import unittest
import numpy as np
from systemds.context import SystemDSContext
from systemds.examples.tutorials.adult import DataManager
-from systemds.operator import Frame, Matrix, OperationNode
-from systemds.operator.algorithm import (confusionMatrix, kmeans, l2svm,
- multiLogReg, multiLogRegPredict,
- scale, scaleApply, split, winsorize)
-from systemds.script_building import DMLScript
+from systemds.operator.algorithm import (confusionMatrix,
+ multiLogReg, multiLogRegPredict)
-class Test_DMLScript(unittest.TestCase):
+class TestAdultStandardML(unittest.TestCase):
"""
Test class for adult dml script tutorial code.
"""
@@ -152,212 +149,6 @@ class Test_DMLScript(unittest.TestCase):
self.assertTrue(confusion_numpy[1][1] > 0.5)
self.assertTrue(confusion_numpy[1][0] < 0.2)
- # def test_neural_net(self):
- # # Reduced because we want the tests to finish a bit faster.
- # train_count = 15000
- # test_count = 5000
-
- # train_data, train_labels, test_data, test_labels =
self.d.get_preprocessed_dataset(interpolate=True, standardize=True, dimred=0.1)
-
- # # Train data
- # X = self.sds.from_numpy( train_data[:train_count])
- # Y = self.sds.from_numpy( train_labels[:train_count])
-
- # # Test data
- # Xt = self.sds.from_numpy(test_data[:test_count])
- # Yt = self.sds.from_numpy(test_labels[:test_count])
-
- # FFN_package = self.sds.source(self.neural_net_src_path, "fnn",
print_imported_methods=True)
-
- # network = FFN_package.train(X, Y, 1, 16, 0.01, 1)
-
- # self.assertTrue(type(network) is not None) # sourcing and training
seems to works
-
- # FFN_package.save_model(network,
'"model/python_FFN/"').compute(verbose=True)
-
- # # TODO This does not work yet, not sure what the problem is
- # #probs = FFN_package.predict(Xt, network).compute(True)
- # # FFN_package.eval(Yt, Yt).compute()
-
- # def test_level1(self):
- # # Reduced because we want the tests to finish a bit faster.
- # train_count = 15000
- # test_count = 5000
- # train_data, train_labels, test_data, test_labels =
self.d.get_preprocessed_dataset(interpolate=True,
- #
standardize=True, dimred=0.1)
- # # Train data
- # X = self.sds.from_numpy(train_data[:train_count])
- # Y = self.sds.from_numpy(train_labels[:train_count])
- # Y = Y + 1.0
-
- # # Test data
- # Xt = self.sds.from_numpy(test_data[:test_count])
- # Yt = self.sds.from_numpy(test_labels[:test_count])
- # Yt = Yt + 1.0
-
- # betas = multiLogReg(X, Y)
-
- # [_, y_pred, acc] = multiLogRegPredict(Xt, betas, Yt).compute()
- # self.assertGreater(acc, 80) #Todo remove?
- # # todo add text how high acc should be with this config
-
- # confusion_matrix_abs, _ =
confusionMatrix(self.sds.from_numpy(y_pred), Yt).compute()
- # # todo print confusion matrix? Explain cm?
- # self.assertTrue(
- # np.allclose(
- # confusion_matrix_abs,
- # np.array([[3583, 502],
- # [245, 670]])
- # )
- # )
-
- # def test_level2(self):
-
- # train_count = 32561
- # test_count = 16281
-
- # SCHEMA =
'"DOUBLE,STRING,DOUBLE,STRING,DOUBLE,STRING,STRING,STRING,STRING,STRING,DOUBLE,DOUBLE,DOUBLE,STRING,STRING"'
-
- # F1 = self.sds.read(
- # self.dataset_path_train,
- # schema=SCHEMA
- # )
- # F2 = self.sds.read(
- # self.dataset_path_test,
- # schema=SCHEMA
- # )
-
- # jspec = self.sds.read(self.dataset_jspec, data_type="scalar",
value_type="string")
- # PREPROCESS_package = self.sds.source(self.preprocess_src_path,
"preprocess", print_imported_methods=True)
-
- # X1 = F1.rbind(F2)
- # X1, M1 = X1.transform_encode(spec=jspec)
-
- # X = PREPROCESS_package.get_X(X1, 1, train_count)
- # Y = PREPROCESS_package.get_Y(X1, 1, train_count)
-
- # Xt = PREPROCESS_package.get_X(X1, train_count,
train_count+test_count)
- # Yt = PREPROCESS_package.get_Y(X1, train_count,
train_count+test_count)
-
- # Yt = PREPROCESS_package.replace_value(Yt, 3.0, 1.0)
- # Yt = PREPROCESS_package.replace_value(Yt, 4.0, 2.0)
-
- # # better alternative for encoding. This was intended, but it does
not work
- # #F2 = F2.replace("<=50K.", "<=50K")
- # #F2 = F2.replace(">50K.", ">50K")
- # #X1, M = F1.transform_encode(spec=jspec)
- # #X2 = F2.transform_apply(spec=jspec, meta=M)
-
- # #X = PREPROCESS_package.get_X(X1, 1, train_count)
- # #Y = PREPROCESS_package.get_Y(X1, 1, train_count)
- # #Xt = PREPROCESS_package.get_X(X2, 1, test_count)
- # #Yt = PREPROCESS_package.get_Y(X2, 1, test_count)
-
- # # TODO somehow throws error at predict with this included
- # #X, mean, sigma = scale(X, True, True)
- # #Xt = scaleApply(Xt, mean, sigma)
-
- # betas = multiLogReg(X, Y)
-
- # [_, y_pred, acc] = multiLogRegPredict(Xt, betas, Yt)
-
- # confusion_matrix_abs, _ = confusionMatrix(y_pred, Yt).compute()
- # print(confusion_matrix_abs)
- # self.assertTrue(
- # np.allclose(
- # confusion_matrix_abs,
- # np.array([[11593., 1545.],
- # [842., 2302.]])
- # )
- # )
-
- # def test_level3(self):
- # train_count = 32561
- # test_count = 16281
-
- # SCHEMA =
'"DOUBLE,STRING,DOUBLE,STRING,DOUBLE,STRING,STRING,STRING,STRING,STRING,DOUBLE,DOUBLE,DOUBLE,STRING,STRING"'
-
- # F1 = self.sds.read(
- # self.dataset_path_train,
- # schema=SCHEMA
- # )
- # F2 = self.sds.read(
- # self.dataset_path_test,
- # schema=SCHEMA
- # )
-
- # jspec = self.sds.read(self.dataset_jspec, data_type="scalar",
value_type="string")
- # PREPROCESS_package = self.sds.source(self.preprocess_src_path,
"preprocess", print_imported_methods=True)
-
- # X1 = F1.rbind(F2)
- # X1, M1 = X1.transform_encode(spec=jspec)
-
- # X = PREPROCESS_package.get_X(X1, 1, train_count)
- # Y = PREPROCESS_package.get_Y(X1, 1, train_count)
-
- # Xt = PREPROCESS_package.get_X(X1, train_count, train_count +
test_count)
- # Yt = PREPROCESS_package.get_Y(X1, train_count, train_count +
test_count)
-
- # Yt = PREPROCESS_package.replace_value(Yt, 3.0, 1.0)
- # Yt = PREPROCESS_package.replace_value(Yt, 4.0, 2.0)
-
- # # better alternative for encoding
- # # F2 = F2.replace("<=50K.", "<=50K")
- # # F2 = F2.replace(">50K.", ">50K")
- # # X1, M = F1.transform_encode(spec=jspec)
- # # X2 = F2.transform_apply(spec=jspec, meta=M)
-
- # # X = PREPROCESS_package.get_X(X1, 1, train_count)
- # # Y = PREPROCESS_package.get_Y(X1, 1, train_count)
- # # Xt = PREPROCESS_package.get_X(X2, 1, test_count)
- # # Yt = PREPROCESS_package.get_Y(X2, 1, test_count)
-
- # # TODO somehow throws error at predict with this included
- # # X, mean, sigma = scale(X, True, True)
- # # Xt = scaleApply(Xt, mean, sigma)
-
- # FFN_package = self.sds.source(self.neural_net_src_path, "fnn",
print_imported_methods=True)
-
- # epochs = 1
- # batch_size = 16
- # learning_rate = 0.01
- # seed = 42
-
- # network = FFN_package.train(X, Y, epochs, batch_size, learning_rate,
seed)
-
- # """
- # If more ressources are available, one can also choose to train the
model using a parameter server.
- # Here we use the same parameters as before, however we need to
specifiy a few more.
- # """
- #
################################################################################################################
- # # workers = 1
- # # utype = '"BSP"'
- # # freq = '"EPOCH"'
- # # mode = '"LOCAL"'
- # # network = FFN_package.train_paramserv(X, Y, epochs,
- # # batch_size, learning_rate,
workers, utype, freq, mode,
- # # seed)
- #
################################################################################################################
-
- # FFN_package.save_model(network,
'"model/python_FFN/"').compute(verbose=True)
-
- # """
- # Next we evaluate our network on the test set which was not used for
training.
- # The predict function with the test features and our trained network
returns a matrix of class probabilities.
- # This matrix contains for each test sample the probabilities for each
class.
- # For predicting the most likely class of a sample, we choose the
class with the highest probability.
- # """
- #
################################################################################################################
- # #probs = FFN_package.predict(Xt, network)
- #
################################################################################################################
- # """
- # To evaluate how well our model performed on the test set, we can use
the probability matrix from the predict call and the real test labels
- # and compute the log-cosh loss.
- # """
- #
################################################################################################################
- # #FFN_package.eval(Xt, Yt).compute(True)
- #
################################################################################################################
-
if __name__ == "__main__":
unittest.main(exit=False)
diff --git a/src/main/python/tests/examples/tutorials/test_adult_neural.py
b/src/main/python/tests/examples/tutorials/test_adult_neural.py
new file mode 100644
index 0000000000..1323ff22cf
--- /dev/null
+++ b/src/main/python/tests/examples/tutorials/test_adult_neural.py
@@ -0,0 +1,130 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import shutil
+import unittest
+
+from systemds.context import SystemDSContext
+from systemds.examples.tutorials.adult import DataManager
+from systemds.operator.algorithm.builtin.scale import scale
+from systemds.operator.algorithm.builtin.scaleApply import scaleApply
+
+
+class TestAdultNeural(unittest.TestCase):
+ """
+ Test class for adult neural network code
+ """
+
+ sds: SystemDSContext = None
+ d: DataManager = None
+ neural_net_src_path: str = "tests/examples/tutorials/neural_net_source.dml"
+ preprocess_src_path: str = "tests/examples/tutorials/preprocess.dml"
+ dataset_path_train: str =
"../../test/resources/datasets/adult/train_data.csv"
+ dataset_path_train_mtd: str =
"../../test/resources/datasets/adult/train_data.csv.mtd"
+ dataset_path_test: str =
"../../test/resources/datasets/adult/test_data.csv"
+ dataset_path_test_mtd: str =
"../../test/resources/datasets/adult/test_data.csv.mtd"
+ dataset_jspec: str = "../../test/resources/datasets/adult/jspec.json"
+
+ train_count: int = 15000
+ test_count: int = 300
+
+ network_dir: str = "tests/examples/tutorials/model"
+ network: str = network_dir + "/fnn"
+
+ @classmethod
+ def setUpClass(cls):
+ cls.sds = SystemDSContext()
+ cls.d = DataManager()
+ shutil.rmtree(cls.network_dir, ignore_errors=True)
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.sds.close()
+ shutil.rmtree(cls.network_dir, ignore_errors=True)
+
+ # Tests
+
+ def test_train_neural_net(self):
+ self.train_neural_net_and_save()
+ self.eval_neural_net()
+
+ def test_train_predict(self):
+ self.train_neural_net_and_predict()
+
+ # Helper methods
+
+ def prepare_x(self):
+ jspec = self.d.get_jspec(self.sds)
+ train_x_frame = self.d.get_train_data(self.sds)[0:self.train_count]
+ train_x, M1 = train_x_frame.transform_encode(spec=jspec)
+ test_x_frame = self.d.get_test_data(self.sds)[0:self.test_count]
+ test_x = test_x_frame.transform_apply(spec=jspec, meta=M1)
+ # Scale and shift .... not needed because of sigmoid layer,
+ # could be useful therefore tested.
+ [train_x, ce, sc] = scale(train_x)
+ test_x = scaleApply(test_x, ce, sc)
+ return [train_x, test_x]
+
+ def prepare_y(self):
+ jspec_dict = {"recode": ["income"]}
+ jspec_labels = self.sds.scalar(f'"{jspec_dict}"')
+ train_y_frame = self.d.get_train_labels(self.sds)[0:self.train_count]
+ train_y, M2 = train_y_frame.transform_encode(spec=jspec_labels)
+ test_y_frame = self.d.get_test_labels(self.sds)[0:self.test_count]
+ test_y = test_y_frame.transform_apply(spec=jspec_labels, meta=M2)
+ labels = 2
+ train_y = train_y.to_one_hot(labels)
+ test_y = test_y.to_one_hot(labels)
+ return [train_y, test_y]
+
+ def prepare(self):
+ x = self.prepare_x()
+ y = self.prepare_y()
+ return [x[0], x[1], y[0], y[1]]
+
+ def train_neural_net_and_save(self):
+ [train_x, _, train_y, _] = self.prepare()
+ FFN_package = self.sds.source(self.neural_net_src_path, "fnn")
+ network = FFN_package.train(train_x, train_y, 4, 16, 0.01, 1)
+ network.write(self.network).compute()
+
+ def train_neural_net_and_predict(self):
+ [train_x, test_x, train_y, test_y] = self.prepare()
+ FFN_package = self.sds.source(self.neural_net_src_path, "fnn")
+ network = FFN_package.train_paramserv(
+ train_x, train_y, 4, 16, 0.01, 2, 1)
+ probs = FFN_package.predict(test_x, network)
+ accuracy = FFN_package.eval(probs, test_y).compute()
+ # accuracy is returned in percent
+ self.assertTrue(accuracy > 0.80)
+
+ def eval_neural_net(self):
+ [_, test_x, _, test_y] = self.prepare()
+ network = self.sds.read(self.network)
+ FFN_package = self.sds.source(self.neural_net_src_path, "fnn")
+ probs = FFN_package.predict(test_x, network)
+ accuracy = FFN_package.eval(probs, test_y).compute()
+ # accuracy is returned in percent
+ self.assertTrue(accuracy > 0.80)
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)