http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/grad_check.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml deleted file mode 100644 index f3bc9a7..0000000 --- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml +++ /dev/null @@ -1,1769 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * Gradient checks for various architectures. - */ -source("nn/layers/affine.dml") as affine -source("nn/layers/batch_norm1d.dml") as batch_norm1d -source("nn/layers/batch_norm2d.dml") as batch_norm2d -source("nn/layers/conv2d.dml") as conv2d -source("nn/layers/conv2d_builtin.dml") as conv2d_builtin -source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss -source("nn/layers/dropout.dml") as dropout -source("nn/layers/l1_loss.dml") as l1_loss -source("nn/layers/l1_reg.dml") as l1_reg -source("nn/layers/l2_loss.dml") as l2_loss -source("nn/layers/l2_reg.dml") as l2_reg -source("nn/layers/log_loss.dml") as log_loss -source("nn/layers/lstm.dml") as lstm -source("nn/layers/max_pool2d.dml") as max_pool2d -source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin -source("nn/layers/relu.dml") as relu -source("nn/layers/rnn.dml") as rnn -source("nn/layers/scale_shift1d.dml") as scale_shift1d -source("nn/layers/scale_shift2d.dml") as scale_shift2d -source("nn/layers/sigmoid.dml") as sigmoid -source("nn/layers/softmax.dml") as softmax -source("nn/layers/tanh.dml") as tanh -source("nn/test/conv2d_simple.dml") as conv2d_simple -source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple -source("nn/test/util.dml") as test_util - -affine = function() { - /* - * Gradient check for the affine layer. - */ - print("Grad checking the affine layer with L2 loss.") - - # Generate data - N = 3 # num examples - D = 100 # num features - M = 10 # num neurons - X = rand(rows=N, cols=D) - y = rand(rows=N, cols=M) - [W, b] = affine::init(D, M) - - # Compute analytical gradients of loss wrt parameters - out = affine::forward(X, W, b) - dout = l2_loss::backward(out, y) - [dX, dW, db] = affine::backward(dout, X, W, b) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - outmh = affine::forward(X, W, b) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - outph = affine::forward(X, W, b) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking W.") - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - outmh = affine::forward(X, W, b) - lossmh = l2_loss::forward(outmh, y) - W[i,j] = old + h - outph = affine::forward(X, W, b) - lossph = l2_loss::forward(outph, y) - W[i,j] = old # reset - dW_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) - } - } - - print(" - Grad checking b.") - for (i in 1:nrow(b)) { - for (j in 1:ncol(b)) { - # Compute numerical derivative - old = as.scalar(b[i,j]) - b[i,j] = old - h - outmh = affine::forward(X, W, b) - lossmh = l2_loss::forward(outmh, y) - b[i,j] = old + h - outph = affine::forward(X, W, b) - lossph = l2_loss::forward(outph, y) - b[i,j] = old # reset - db_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh) - } - } -} - -batch_norm1d = function() { - /* - * Gradient check for the 1D batch normalization layer. - */ - print("Grad checking the 1D batch normalization layer with L2 loss.") - - # Generate data - N = 3 # num examples - D = 100 # num features - mu = 0.9 # momentum - eps = 1e-5 # epsilon - X = rand(rows=N, cols=D) - y = rand(rows=N, cols=D) - gamma = rand(rows=1, cols=D) - beta = rand(rows=1, cols=D) - ema_mean = rand(rows=1, cols=D) - ema_var = rand(rows=1, cols=D) - #[dummy, dummy, ema_mean, ema_var] = batch_norm1d::init(D) - - # Check training & testing modes - for (i in 1:2) { - if (i == 1) - mode = 'train' - else - mode = 'test' - print(" - Grad checking the '"+mode+"' mode.") - - # Compute analytical gradients of loss wrt parameters - [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - dout = l2_loss::backward(out, y) - [dX, dgamma, dbeta] = batch_norm1d::backward(dout, out, ema_mean_upd, ema_var_upd, - cache_mean, cache_var, cache_norm, - X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking gamma.") - for (i in 1:nrow(gamma)) { - for (j in 1:ncol(gamma)) { - # Compute numerical derivative - old = as.scalar(gamma[i,j]) - gamma[i,j] = old - h - [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - lossmh = l2_loss::forward(outmh, y) - gamma[i,j] = old + h - [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - lossph = l2_loss::forward(outph, y) - gamma[i,j] = old # reset - dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num, - lossph, lossmh) - } - } - - print(" - Grad checking beta.") - for (i in 1:nrow(beta)) { - for (j in 1:ncol(beta)) { - # Compute numerical derivative - old = as.scalar(beta[i,j]) - beta[i,j] = old - h - [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - lossmh = l2_loss::forward(outmh, y) - beta[i,j] = old + h - [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) - lossph = l2_loss::forward(outph, y) - beta[i,j] = old # reset - dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num, - lossph, lossmh) - } - } - } -} - -batch_norm2d = function() { - /* - * Gradient check for the 2D (spatial) batch normalization layer. - */ - print("Grad checking the 2D (spatial) batch normalization layer with L2 loss.") - - # Generate data - N = 3 # num examples - C = 2 # num channels - Hin = 5 # input height - Win = 5 # input width - mu = 0.9 # momentum - eps = 1e-5 # epsilon - X = rand(rows=N, cols=C*Hin*Win) - y = rand(rows=N, cols=C*Hin*Win) - gamma = rand(rows=C, cols=1) - beta = rand(rows=C, cols=1) - ema_mean = rand(rows=C, cols=1) - ema_var = rand(rows=C, cols=1) - #[dummy, dummy, ema_mean, ema_var] = batch_norm2d::init(C) - - # Check training & testing modes - for (i in 1:2) { - if (i == 1) - mode = 'train' - else - mode = 'test' - print(" - Grad checking the '"+mode+"' mode.") - - # Compute analytical gradients of loss wrt parameters - [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) - dout = l2_loss::backward(out, y) - [dX, dgamma, dbeta] = batch_norm2d::backward(dout, out, ema_mean_upd, ema_var_upd, - cache_mean, cache_var, cache_norm, - X, gamma, beta, C, Hin, Win, mode, - ema_mean, ema_var, mu, eps) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking gamma.") - for (i in 1:nrow(gamma)) { - for (j in 1:ncol(gamma)) { - # Compute numerical derivative - old = as.scalar(gamma[i,j]) - gamma[i,j] = old - h - [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) - lossmh = l2_loss::forward(outmh, y) - gamma[i,j] = old + h - [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) - lossph = l2_loss::forward(outph, y) - gamma[i,j] = old # reset - dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num, - lossph, lossmh) - } - } - - print(" - Grad checking beta.") - for (i in 1:nrow(beta)) { - for (j in 1:ncol(beta)) { - # Compute numerical derivative - old = as.scalar(beta[i,j]) - beta[i,j] = old - h - [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) - lossmh = l2_loss::forward(outmh, y) - beta[i,j] = old + h - [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = - batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) - lossph = l2_loss::forward(outph, y) - beta[i,j] = old # reset - dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num, - lossph, lossmh) - } - } - } -} - -conv2d = function() { - /* - * Gradient check for the 2D convolutional layer using `im2col`. - */ - print("Grad checking the `im2col` 2D convolutional layer with L2 loss.") - - # Generate data - N = 2 # num examples - C = 2 # num channels - Hin = 5 # input height - Win = 5 # input width - F = 2 # num filters - Hf = 3 # filter height - Wf = 3 # filter width - stride = 1 - pad = 1 - X = rand(rows=N, cols=C*Hin*Win) - y = rand(rows=N, cols=F*Hin*Win) - - # Create layers - [W, b] = conv2d::init(F, C, Hf, Wf) - - # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - dout = l2_loss::backward(out, y) - [dX, dW, db] = conv2d::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking W.") - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossmh = l2_loss::forward(outmh, y) - W[i,j] = old + h - [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossph = l2_loss::forward(outph, y) - W[i,j] = old # reset - dW_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) - } - } - - print(" - Grad checking b.") - for (i in 1:nrow(b)) { - for (j in 1:ncol(b)) { - # Compute numerical derivative - old = as.scalar(b[i,j]) - b[i,j] = old - h - [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossmh = l2_loss::forward(outmh, y) - b[i,j] = old + h - [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossph = l2_loss::forward(outph, y) - b[i,j] = old # reset - db_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh) - } - } -} - -conv2d_builtin = function() { - /* - * Gradient check for the 2D convolutional layer using built-in - * functions. - */ - print("Grad checking the built-in 2D convolutional layer with L2 loss.") - - # Generate data - N = 2 # num examples - C = 2 # num channels - Hin = 5 # input height - Win = 5 # input width - F = 2 # num filters - Hf = 3 # filter height - Wf = 3 # filter width - stride = 1 - pad = 1 - X = rand(rows=N, cols=C*Hin*Win) - y = rand(rows=N, cols=F*Hin*Win) - - # Create layers - [W, b] = conv2d_builtin::init(F, C, Hf, Wf) - - # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - dout = l2_loss::backward(out, y) - [dX, dW, db] = conv2d_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking W.") - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - W[i,j] = old + h - [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - W[i,j] = old # reset - dW_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) - } - } - - print(" - Grad checking b.") - for (i in 1:nrow(b)) { - for (j in 1:ncol(b)) { - # Compute numerical derivative - old = as.scalar(b[i,j]) - b[i,j] = old - h - [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - b[i,j] = old + h - [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - b[i,j] = old # reset - db_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh) - } - } -} - -conv2d_simple = function() { - /* - * Gradient check for the simple reference 2D convolutional layer. - */ - print("Grad checking the simple reference 2D convolutional layer with L2 loss.") - - # Generate data - N = 2 # num examples - C = 2 # num channels - Hin = 5 # input height - Win = 5 # input width - F = 2 # num filters - Hf = 3 # filter height - Wf = 3 # filter width - stride = 1 - pad = 1 - X = rand(rows=N, cols=C*Hin*Win) - y = rand(rows=N, cols=F*Hin*Win) - - # Create layers - [W, b] = conv2d_simple::init(F, C, Hf, Wf) - - # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - dout = l2_loss::backward(out, y) - [dX, dW, db] = conv2d_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking W.") - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - W[i,j] = old + h - [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - W[i,j] = old # reset - dW_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) - } - } - - print(" - Grad checking b.") - for (i in 1:nrow(b)) { - for (j in 1:ncol(b)) { - # Compute numerical derivative - old = as.scalar(b[i,j]) - b[i,j] = old - h - [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - b[i,j] = old + h - [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - b[i,j] = old # reset - db_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh) - } - } -} - -cross_entropy_loss = function() { - /* - * Gradient check for the cross-entropy loss function. - */ - print("Grad checking the cross-entropy loss function.") - - # Generate data - N = 3 # num examples - K = 10 # num targets - pred = rand(rows=N, cols=K, min=0, max=1, pdf="uniform") - pred = pred / rowSums(pred) # normalized probs - y = rand(rows=N, cols=K, min=0, max=1, pdf="uniform") - y = y / rowSums(y) # normalized probs - - # Compute analytical gradient - dpred = cross_entropy_loss::backward(pred, y) - - # Grad check - h = 1e-5 - for (i in 1:nrow(pred)) { - for (j in 1:ncol(pred)) { - # Compute numerical derivative - old = as.scalar(pred[i,j]) - pred[i,j] = old - h - lossmh = cross_entropy_loss::forward(pred, y) - pred[i,j] = old + h - lossph = cross_entropy_loss::forward(pred, y) - pred[i,j] = old # reset W[i,j] - dpred_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) - } - } -} - -dropout = function() { - /* - * Gradient check for the (inverted) dropout layer. - */ - print("Grad checking the (inverted) dropout layer with L2 loss.") - - # Generate data - N = 3 # num examples - M = 100 # num neurons - p = 0.5 # probability of dropping neuron output - seed = as.integer(floor(as.scalar(rand(rows=1, cols=1, min=1, max=100000)))) # random seed - X = rand(rows=N, cols=M) - y = rand(rows=N, cols=M) - - # Compute analytical gradients of loss wrt parameters - [out, mask] = dropout::forward(X, p, seed) - dout = l2_loss::backward(out, y) - dX = dropout::backward(dout, X, p, mask) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, mask] = dropout::forward(X, p, seed) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, mask] = dropout::forward(X, p, seed) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } -} - -l1_loss = function() { - /* - * Gradient check for the L1 loss function. - */ - print("Grad checking the L1 loss function.") - - # Generate data - N = 3 # num examples - D = 2 # num targets - pred = rand(rows=N, cols=D) - y = rand(rows=N, cols=D) - - # Compute analytical gradient - dpred = l1_loss::backward(pred, y) - - # Grad check - h = 1e-5 - for (i in 1:nrow(pred)) { - for (j in 1:ncol(pred)) { - # Compute numerical derivative - old = as.scalar(pred[i,j]) - pred[i,j] = old - h - lossmh = l1_loss::forward(pred, y) - pred[i,j] = old + h - lossph = l1_loss::forward(pred, y) - pred[i,j] = old # reset W[i,j] - dpred_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) - } - } -} - -l1_reg = function() { - /* - * Gradient check for the L1 regularization function. - */ - print("Grad checking the L1 regularization function.") - - # Generate data - D = 5 # num features - M = 3 # num neurons - lambda = 0.01 - W = rand(rows=D, cols=M) - - # Compute analytical gradient - dW = l1_reg::backward(W, lambda) - - # Grad check - h = 1e-5 - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - reg_lossmh = l1_reg::forward(W, lambda) - W[i,j] = old + h - reg_lossph = l1_reg::forward(W, lambda) - W[i,j] = old # reset W[i,j] - dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, - reg_lossph, reg_lossmh) - } - } -} - -l2_loss = function() { - /* - * Gradient check for the L2 loss function. - */ - print("Grad checking the L2 loss function.") - - # Generate data - N = 3 # num examples - D = 2 # num targets - pred = rand(rows=N, cols=D) - y = rand(rows=N, cols=D) - - # Compute analytical gradient - dpred = l2_loss::backward(pred, y) - - # Grad check - h = 1e-5 - for (i in 1:nrow(pred)) { - for (j in 1:ncol(pred)) { - # Compute numerical derivative - old = as.scalar(pred[i,j]) - pred[i,j] = old - h - lossmh = l2_loss::forward(pred, y) - pred[i,j] = old + h - lossph = l2_loss::forward(pred, y) - pred[i,j] = old # reset W[i,j] - dpred_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) - } - } -} - -l2_reg = function() { - /* - * Gradient check for the L2 regularization function. - */ - print("Grad checking the L2 regularization function.") - - # Generate data - D = 5 # num features - M = 3 # num neurons - lambda = 0.01 - W = rand(rows=D, cols=M) - - # Compute analytical gradient - dW = l2_reg::backward(W, lambda) - - # Grad check - h = 1e-5 - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - reg_lossmh = l2_reg::forward(W, lambda) - W[i,j] = old + h - reg_lossph = l2_reg::forward(W, lambda) - W[i,j] = old # reset W[i,j] - dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, - reg_lossph, reg_lossmh) - } - } -} - -log_loss = function() { - /* - * Gradient check for the log loss function. - */ - print("Grad checking the log loss function.") - - # Generate data - N = 20 # num examples - D = 1 # num targets - pred = rand(rows=N, cols=D, min=0, max=1, pdf="uniform") - y = round(rand(rows=N, cols=D, min=0, max=1, pdf="uniform")) - - # Compute analytical gradient - dpred = log_loss::backward(pred, y) - - # Grad check - h = 1e-5 - for (i in 1:nrow(pred)) { - for (j in 1:ncol(pred)) { - # Compute numerical derivative - old = as.scalar(pred[i,j]) - pred[i,j] = old - h - lossmh = log_loss::forward(pred, y) - pred[i,j] = old + h - lossph = log_loss::forward(pred, y) - pred[i,j] = old # reset W[i,j] - dpred_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) - } - } -} - -lstm = function() { - /* - * Gradient check for the LSTM layer. - */ - print("Grad checking the LSTM layer with L2 loss.") - - # Generate data - N = 3 # num examples - D = 10 # num features - T = 15 # num timesteps (sequence length) - M = 5 # num neurons - return_seq = TRUE - X = rand(rows=N, cols=T*D) - y = rand(rows=N, cols=T*M) - yc = rand(rows=N, cols=M) - out0 = rand(rows=N, cols=M) - c0 = rand(rows=N, cols=M) - [W, b, dummy, dummy2] = lstm::init(N, D, M) - - # Compute analytical gradients of loss wrt parameters - [out, c, cache_out, cache_c, cache_ifog] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - dout = l2_loss::backward(out, y) - dc = l2_loss::backward(c, yc) - [dX, dW, db, dout0, dc0] = lstm::backward(dout, dc, X, W, b, T, D, return_seq, out0, c0, - cache_out, cache_c, cache_ifog) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outmh = l2_loss::forward(outmh, y) - loss_cmh = l2_loss::forward(cmh, yc) - lossmh = loss_outmh + loss_cmh - X[i,j] = old + h - [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outph = l2_loss::forward(outph, y) - loss_cph = l2_loss::forward(cph, yc) - lossph = loss_outph + loss_cph - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking W.") - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outmh = l2_loss::forward(outmh, y) - loss_cmh = l2_loss::forward(cmh, yc) - lossmh = loss_outmh + loss_cmh - W[i,j] = old + h - [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outph = l2_loss::forward(outph, y) - loss_cph = l2_loss::forward(cph, yc) - lossph = loss_outph + loss_cph - W[i,j] = old # reset - dW_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) - } - } - - print(" - Grad checking b.") - for (i in 1:nrow(b)) { - for (j in 1:ncol(b)) { - # Compute numerical derivative - old = as.scalar(b[i,j]) - b[i,j] = old - h - [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outmh = l2_loss::forward(outmh, y) - loss_cmh = l2_loss::forward(cmh, yc) - lossmh = loss_outmh + loss_cmh - b[i,j] = old + h - [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outph = l2_loss::forward(outph, y) - loss_cph = l2_loss::forward(cph, yc) - lossph = loss_outph + loss_cph - b[i,j] = old # reset - db_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh) - } - } - - print(" - Grad checking out0.") - for (i in 1:nrow(out0)) { - for (j in 1:ncol(out0)) { - # Compute numerical derivative - old = as.scalar(out0[i,j]) - out0[i,j] = old - h - [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outmh = l2_loss::forward(outmh, y) - loss_cmh = l2_loss::forward(cmh, yc) - lossmh = loss_outmh + loss_cmh - out0[i,j] = old + h - [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outph = l2_loss::forward(outph, y) - loss_cph = l2_loss::forward(cph, yc) - lossph = loss_outph + loss_cph - out0[i,j] = old # reset - dout0_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh) - } - } - - print(" - Grad checking c0.") - for (i in 1:nrow(c0)) { - for (j in 1:ncol(c0)) { - # Compute numerical derivative - old = as.scalar(c0[i,j]) - c0[i,j] = old - h - [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outmh = l2_loss::forward(outmh, y) - loss_cmh = l2_loss::forward(cmh, yc) - lossmh = loss_outmh + loss_cmh - c0[i,j] = old + h - [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0) - loss_outph = l2_loss::forward(outph, y) - loss_cph = l2_loss::forward(cph, yc) - lossph = loss_outph + loss_cph - c0[i,j] = old # reset - dc0_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh) - } - } -} - -max_pool2d = function() { - /* - * Gradient check for the 2D max pooling layer. - */ - print("Grad checking the 2D max pooling layer with L2 loss.") - - # Generate data - N = 2 # num examples - C = 2 # num channels - Hin = 4 # input height - Win = 4 # input width - Hf = 2 # pool filter height - Wf = 2 # pool filter width - stride = 2 - X = rand(rows=N, cols=C*Hin*Win) - - for (pad in 0:1) { - print(" - Grad checking w/ pad="+pad+".") - Hout = as.integer(floor((Hin + 2*pad - Hf)/stride + 1)) - Wout = as.integer(floor((Win + 2*pad - Wf)/stride + 1)) - y = rand(rows=N, cols=C*Hout*Wout) - - # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - dout = l2_loss::backward(out, y) - dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - } -} - -max_pool2d_builtin = function() { - /* - * Gradient check for the 2D max pooling layer. - */ - print("Grad checking the built-in 2D max pooling layer with L2 loss.") - - # Generate data - N = 2 # num examples - C = 2 # num channels - Hin = 4 # input height - Win = 4 # input width - Hf = 2 # pool filter height - Wf = 2 # pool filter width - stride = 2 - X = rand(rows=N, cols=C*Hin*Win) - - for (pad in 0:1) { - print(" - Grad checking w/ pad="+pad+".") - Hout = as.integer(floor((Hin + 2 * pad - Hf) / stride + 1)) - Wout = as.integer(floor((Win + 2 * pad - Wf) / stride + 1)) - y = rand(rows=N, cols=C*Hout*Wout) - - # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - dout = l2_loss::backward(out, y) - dX = max_pool2d_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - } -} - -max_pool2d_simple = function() { - /* - * Gradient check for the simple reference 2D max pooling layer. - */ - print("Grad checking the simple reference 2D max pooling layer with L2 loss.") - - # Generate data - N = 2 # num examples - C = 2 # num channels - Hin = 4 # input height - Win = 4 # input width - Hf = 2 # pool filter height - Wf = 2 # pool filter width - stride = 2 - X = rand(rows=N, cols=C*Hin*Win) - - for (pad in 0:1) { - print(" - Grad checking w/ pad="+pad+".") - Hout = as.integer(floor((Hin + 2*pad - Hf)/stride + 1)) - Wout = as.integer(floor((Win + 2*pad - Wf)/stride + 1)) - y = rand(rows=N, cols=C*Hout*Wout) - - # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - dout = l2_loss::backward(out, y) - dX = max_pool2d_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - } -} - -relu = function() { - /* - * Gradient check for the ReLU nonlinearity layer. - * - * NOTE: This could result in a false-negative in which the test - * fails due to a kink being crossed in the nonlinearity. This - * occurs when the tests, f(x-h) and f(x+h), end up on opposite - * sides of the zero threshold of max(0, fx). For now, just run - * the tests again. In the future, we can explicitly check for - * this and rerun the test automatically. - */ - print("Grad checking the ReLU nonlinearity layer with L2 loss.") - - # Generate data - N = 3 # num examples - M = 10 # num neurons - X = rand(rows=N, cols=M, min=-5, max=5) - y = rand(rows=N, cols=M) - - # Compute analytical gradients of loss wrt parameters - out = relu::forward(X) - dout = l2_loss::backward(out, y) - dX = relu::backward(dout, X) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - outmh = relu::forward(X) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - outph = relu::forward(X) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } -} - -rnn = function() { - /* - * Gradient check for the simple RNN layer. - */ - print("Grad checking the simple RNN layer with L2 loss.") - - # Generate data - N = 3 # num examples - D = 10 # num features - T = 15 # num timesteps (sequence length) - M = 5 # num neurons - return_seq = TRUE - X = rand(rows=N, cols=T*D) - y = rand(rows=N, cols=T*M) - out0 = rand(rows=N, cols=M) - [W, b, dummy] = rnn::init(N, D, M) - - # Compute analytical gradients of loss wrt parameters - [out, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - dout = l2_loss::backward(out, y) - [dX, dW, db, dout0] = rnn::backward(dout, X, W, b, T, D, return_seq, out0, cache_out) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking W.") - for (i in 1:nrow(W)) { - for (j in 1:ncol(W)) { - # Compute numerical derivative - old = as.scalar(W[i,j]) - W[i,j] = old - h - [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossmh = l2_loss::forward(outmh, y) - W[i,j] = old + h - [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossph = l2_loss::forward(outph, y) - W[i,j] = old # reset - dW_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) - } - } - - print(" - Grad checking b.") - for (i in 1:nrow(b)) { - for (j in 1:ncol(b)) { - # Compute numerical derivative - old = as.scalar(b[i,j]) - b[i,j] = old - h - [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossmh = l2_loss::forward(outmh, y) - b[i,j] = old + h - [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossph = l2_loss::forward(outph, y) - b[i,j] = old # reset - db_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh) - } - } - - print(" - Grad checking out0.") - for (i in 1:nrow(out0)) { - for (j in 1:ncol(out0)) { - # Compute numerical derivative - old = as.scalar(out0[i,j]) - out0[i,j] = old - h - [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossmh = l2_loss::forward(outmh, y) - out0[i,j] = old + h - [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) - lossph = l2_loss::forward(outph, y) - out0[i,j] = old # reset - dout0_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh) - } - } -} - -scale_shift1d = function() { - /* - * Gradient check for the 1D scale & shift layer. - */ - print("Grad checking the 1D scale & shift layer with L2 loss.") - - # Generate data - N = 3 # num examples - D = 100 # num features - X = rand(rows=N, cols=D) - y = rand(rows=N, cols=D) - [gamma, beta] = scale_shift1d::init(D) - - # Compute analytical gradients of loss wrt parameters - out = scale_shift1d::forward(X, gamma, beta) - dout = l2_loss::backward(out, y) - [dX, dgamma, dbeta] = scale_shift1d::backward(dout, out, X, gamma, beta) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - outmh = scale_shift1d::forward(X, gamma, beta) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - outph = scale_shift1d::forward(X, gamma, beta) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking gamma.") - for (i in 1:nrow(gamma)) { - for (j in 1:ncol(gamma)) { - # Compute numerical derivative - old = as.scalar(gamma[i,j]) - gamma[i,j] = old - h - outmh = scale_shift1d::forward(X, gamma, beta) - lossmh = l2_loss::forward(outmh, y) - gamma[i,j] = old + h - outph = scale_shift1d::forward(X, gamma, beta) - lossph = l2_loss::forward(outph, y) - gamma[i,j] = old # reset - dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num, - lossph, lossmh) - } - } - - print(" - Grad checking beta.") - for (i in 1:nrow(beta)) { - for (j in 1:ncol(beta)) { - # Compute numerical derivative - old = as.scalar(beta[i,j]) - beta[i,j] = old - h - outmh = scale_shift1d::forward(X, gamma, beta) - lossmh = l2_loss::forward(outmh, y) - beta[i,j] = old + h - outph = scale_shift1d::forward(X, gamma, beta) - lossph = l2_loss::forward(outph, y) - beta[i,j] = old # reset - dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num, - lossph, lossmh) - } - } -} - -scale_shift2d = function() { - /* - * Gradient check for the 2D scale & shift layer. - */ - print("Grad checking the 2D scale & shift layer with L2 loss.") - - # Generate data - N = 3 # num examples - C = 2 # num channels - Hin = 5 # input height - Win = 5 # input width - X = rand(rows=N, cols=C*Hin*Win) - y = rand(rows=N, cols=C*Hin*Win) - [gamma, beta] = scale_shift2d::init(C) - - # Compute analytical gradients of loss wrt parameters - out = scale_shift2d::forward(X, gamma, beta, C, Hin, Win) - dout = l2_loss::backward(out, y) - [dX, dgamma, dbeta] = scale_shift2d::backward(dout, out, X, gamma, beta, C, Hin, Win) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - outmh = scale_shift2d::forward(X, gamma, beta, C, Hin, Win) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - outph = scale_shift2d::forward(X, gamma, beta, C, Hin, Win) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking gamma.") - for (i in 1:nrow(gamma)) { - for (j in 1:ncol(gamma)) { - # Compute numerical derivative - old = as.scalar(gamma[i,j]) - gamma[i,j] = old - h - outmh = scale_shift2d::forward(X, gamma, beta, C, Hin, Win) - lossmh = l2_loss::forward(outmh, y) - gamma[i,j] = old + h - outph = scale_shift2d::forward(X, gamma, beta, C, Hin, Win) - lossph = l2_loss::forward(outph, y) - gamma[i,j] = old # reset - dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num, - lossph, lossmh) - } - } - - print(" - Grad checking beta.") - for (i in 1:nrow(beta)) { - for (j in 1:ncol(beta)) { - # Compute numerical derivative - old = as.scalar(beta[i,j]) - beta[i,j] = old - h - outmh = scale_shift2d::forward(X, gamma, beta, C, Hin, Win) - lossmh = l2_loss::forward(outmh, y) - beta[i,j] = old + h - outph = scale_shift2d::forward(X, gamma, beta, C, Hin, Win) - lossph = l2_loss::forward(outph, y) - beta[i,j] = old # reset - dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num, - lossph, lossmh) - } - } -} - -sigmoid = function() { - /* - * Gradient check for the sigmoid nonlinearity layer. - */ - print("Grad checking the sigmoid nonlinearity layer with L2 loss.") - - # Generate data - N = 3 # num examples - M = 10 # num neurons - X = rand(rows=N, cols=M) - y = rand(rows=N, cols=M) - - # Compute analytical gradients of loss wrt parameters - out = sigmoid::forward(X) - dout = l2_loss::backward(out, y) - dX = sigmoid::backward(dout, X) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - outmh = sigmoid::forward(X) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - outph = sigmoid::forward(X) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } -} - -softmax = function() { - /* - * Gradient check for the softmax layer. - */ - print("Grad checking the softmax layer with L2 loss.") - - # Generate data - N = 3 # num examples - D = 10 # num classes - X = rand(rows=N, cols=D) - y = rand(rows=N, cols=D, min=0, max=1, pdf="uniform") - y = y / rowSums(y) - - # Compute analytical gradients of loss wrt parameters - out = softmax::forward(X) - dout = l2_loss::backward(out, y) - dX = softmax::backward(dout, X) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - outmh = softmax::forward(X) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - outph = softmax::forward(X) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } -} - -tanh = function() { - /* - * Gradient check for the hyperbolic tangent (tanh) nonlinearity - * layer. - */ - print("Grad checking the tanh nonlinearity layer with L2 loss.") - - # Generate data - N = 3 # num examples - M = 10 # num neurons - X = rand(rows=N, cols=M) - y = rand(rows=N, cols=M) - - # Compute analytical gradients of loss wrt parameters - out = tanh::forward(X) - dout = l2_loss::backward(out, y) - dX = tanh::backward(dout, X) - - # Grad check - h = 1e-5 - for (i in 1:nrow(X)) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old = as.scalar(X[i,j]) - X[i,j] = old - h - outmh = tanh::forward(X) - lossmh = l2_loss::forward(outmh, y) - X[i,j] = old + h - outph = tanh::forward(X) - lossph = l2_loss::forward(outph, y) - X[i,j] = old # reset - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } -} - -two_layer_affine_l2_net = function() { - /* - * Gradient check for a two-layer, fully-connected, feed-forward - * network with ReLU nonlinearity and L2 loss. - * - * NOTE: This could result in a false-negative in which the test - * fails due to a kink being crossed in the ReLU nonlinearity. This - * occurs when the tests, f(x-h) and f(x+h), end up on opposite - * sides of the zero threshold of max(0, fx). For now, just run - * the tests again. In the future, we can explicitly check for - * this and rerun the test automatically. - */ - print("Grad checking a two-layer, fully-connected, feed-forward network with a ReLU " + - "nonlinearity, and an L2 loss function.") - - # Generate input data - N = 1000 # num examples - D = 100 # num features - yD = 5 # num targets - X = rand(rows=N, cols=D, pdf="normal") - y = rand(rows=N, cols=yD) - - # Create 2-layer, fully-connected network - M = 10 # number of hidden neurons - [W1, b1] = affine::init(D, M) - [W2, b2] = affine::init(M, yD) - - # Optimize for short "burn-in" time to move to characteristic - # mode of operation and unmask any real issues. - print(" - Burn-in:") - lr = 0.0001 - decay = 0.99 - for(i in 1:5) { - # Compute forward and backward passes of net - [pred, loss, dX, dW1, db1, dW2, db2] = two_layer_affine_l2_net_run(X, y, W1, b1, W2, b2) - print(" - L2 loss: " + loss) - - # Optimize with basic SGD - W1 = W1 - lr * dW1 - b1 = b1 - lr * db1 - W2 = W2 - lr * dW2 - b2 = b2 - lr * db2 - lr = lr * decay - } - - # Compute analytical gradients - [pred, loss, dX, dW1, db1, dW2, db2] = two_layer_affine_l2_net_run(X, y, W1, b1, W2, b2) - - # Grad check - h = 1e-5 - print(" - Grad checking X.") - for (i in 1:2) { - for (j in 1:ncol(X)) { - # Compute numerical derivative - old_x = as.scalar(X[i,j]) - X[i,j] = old_x - h - [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - X[i,j] = old_x + h - [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - X[i,j] = old_x # reset X[i,j] - dX_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) - } - } - - print(" - Grad checking W1.") - for (i in 1:nrow(W1)) { - for (j in 1:ncol(W1)) { - # Compute numerical derivative - old_w = as.scalar(W1[i,j]) - W1[i,j] = old_w - h - [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - W1[i,j] = old_w + h - [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - W1[i,j] = old_w # reset W[i,j] - dWij_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh) - } - } - - print(" - Grad checking W2.") - for (i in 1:nrow(W2)) { - for (j in 1:ncol(W2)) { - # Compute numerical derivative - old_w = as.scalar(W2[i,j]) - W2[i,j] = old_w - h - [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - W2[i,j] = old_w + h - [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - W2[i,j] = old_w # reset W[i,j] - dWij_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh) - } - } - - print(" - Grad checking b1.") - for (i in 1:nrow(b1)) { - for (j in 1:ncol(b1)) { - # Compute numerical derivative - old_b = as.scalar(b1[i,j]) - b1[i,j] = old_b - h - [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - b1[i,j] = old_b + h - [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - b1[i,j] = old_b # reset b[1,j] - dbij_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh) - } - } - - print(" - Grad checking b2.") - for (i in 1:nrow(b2)) { - for (j in 1:ncol(b2)) { - # Compute numerical derivative - old_b = as.scalar(b2[i,j]) - b2[i,j] = old_b - h - [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - b2[i,j] = old_b + h - [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - b2[i,j] = old_b # reset b[1,j] - dbij_num = (lossph-lossmh) / (2*h) # numerical derivative - - # Check error - rel_error = test_util::check_rel_grad_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh) - } - } -} - -/* - * Test network with forward/backward functions. - */ -two_layer_affine_l2_net_run = function(matrix[double] X, matrix[double] y, - matrix[double] W1, matrix[double] b1, - matrix[double] W2, matrix[double] b2) - return (matrix[double] pred, double loss, - matrix[double] dX, - matrix[double] dW1, matrix[double] db1, - matrix[double] dW2, matrix[double] db2) { - # Compute forward pass - [loss, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) - - # Compute backward pass - [dX, dpred, daout, dhout, dW1, db1, dW2, db2] = - two_layer_affine_l2_net_backward(X, y, pred, aout, hout, W1, b1, W2, b2) -} - -two_layer_affine_l2_net_forward = function(matrix[double] X, matrix[double] y, - matrix[double] W1, matrix[double] b1, - matrix[double] W2, matrix[double] b2) - return (double loss, matrix[double] pred, matrix[double] aout, matrix[double] hout) { - # Compute forward pass - hout = affine::forward(X, W1, b1) - aout = relu::forward(hout) - pred = affine::forward(aout, W2, b2) - - # Compute loss - loss = l2_loss::forward(pred, y) -} - -two_layer_affine_l2_net_backward = function(matrix[double] X, matrix[double] y, matrix[double] pred, - matrix[double] aout, matrix[double] hout, - matrix[double] W1, matrix[double] b1, - matrix[double] W2, matrix[double] b2) - return (matrix[double] dX, matrix[double] dpred, - matrix[double] daout, matrix[double] dhout, - matrix[double] dW1, matrix[double] db1, matrix[double] dW2, matrix[double] db2) { - # Compute backward pass - dpred = l2_loss::backward(pred, y) - [daout, dW2, db2] = affine::backward(dpred, aout, W2, b2) - dhout = relu::backward(daout, hout) - [dX, dW1, db1] = affine::backward(dhout, X, W1, b1) -} -
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml deleted file mode 100644 index 188bd6e..0000000 --- a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml +++ /dev/null @@ -1,172 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * Max Pooling layer. - * - * This implementation is intended to be a simple, reference version. - */ - -forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] out, int Hout, int Wout) { - /* - * Computes the forward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * This implementation is intended to be a simple, reference version. - * - * Inputs: - * - X: Inputs, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - out: Outputs, of shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - */ - N = nrow(X) - Hout = as.integer(floor((Hin + 2*padh - Hf)/strideh + 1)) - Wout = as.integer(floor((Win + 2*padw - Wf)/stridew + 1)) - - # Create output volume - out = matrix(0, rows=N, cols=C*Hout*Wout) - - # Max pooling - parfor (n in 1:N, check=0) { # all examples - Xn = matrix(X[n,], rows=C, cols=Hin*Win) - - # Pad image - pad_value = -1/0 - Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros - parfor (c in 1:C) { - Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) - Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape - } - img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw)) - - parfor (c in 1:C, check=0) { # all channels - img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) - parfor (hout in 1:Hout, check=0) { # all output rows - hin = (hout-1) * strideh + 1 - parfor (wout in 1:Wout, check=0) { # all output columns - win = (wout-1) * stridew + 1 - out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1, - win:win+Wf-1]) - } - } - } - } -} - -backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] dX) { - /* - * Computes the backward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * Inputs: - * - dout: Gradient wrt `out` from upstream, of - * shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - * - X: Inputs, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). - */ - N = nrow(X) - - # Create gradient volume - dX = matrix(0, rows=N, cols=C*Hin*Win) - - # Gradient of max pooling - for (n in 1:N) { # all examples - Xn = matrix(X[n,], rows=C, cols=Hin*Win) - - # Pad image - pad_value = -1/0 - Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros - parfor (c in 1:C) { - Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) - Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape - } - img = Xn_padded - - dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) - for (c in 1:C) { # all channels - img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) - dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) - for (hout in 1:Hout, check=0) { # all output rows - hin = (hout-1) * strideh + 1 - for (wout in 1:Wout) { # all output columns - win = (wout-1) * stridew + 1 - img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1] - max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix - # gradient passes through only for the max value(s) in this patch - dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] - dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1] - + dimg_slice_patch - } - } - dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) - } - - # Unpad derivs on input - dXn = matrix(0, rows=C, cols=Hin*Win) - parfor (c in 1:C, check=0) { - dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) - dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] - dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win) - } - dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) - } -} - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/run_tests.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/run_tests.dml b/scripts/staging/SystemML-NN/nn/test/run_tests.dml deleted file mode 100644 index d8173a9..0000000 --- a/scripts/staging/SystemML-NN/nn/test/run_tests.dml +++ /dev/null @@ -1,90 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * Script to run tests. - */ -source("nn/test/grad_check.dml") as grad_check -source("nn/test/test.dml") as test - -print("") -print("Starting grad checks.") -print("---") - -# Loss & loss-related functions -grad_check::cross_entropy_loss() -grad_check::l1_loss() -grad_check::l1_reg() -grad_check::l2_loss() -grad_check::l2_reg() -grad_check::log_loss() -print("") - -# Core layers -grad_check::affine() -grad_check::batch_norm1d() -grad_check::batch_norm2d() -grad_check::conv2d() -grad_check::conv2d_builtin() -grad_check::conv2d_simple() -grad_check::dropout() -grad_check::lstm() -grad_check::max_pool2d() -grad_check::max_pool2d_builtin() -grad_check::max_pool2d_simple() -grad_check::relu() -grad_check::rnn() -grad_check::scale_shift1d() -grad_check::scale_shift2d() -grad_check::sigmoid() -grad_check::softmax() -grad_check::tanh() -print("") - -# Example model -grad_check::two_layer_affine_l2_net() -print("") - -print("---") -print("Grad checks complete -- look for any ERRORs or WARNINGs.") -print("If any tests involving ReLUs failed, try a few times " + - "to ensure that they were not false negatives due to " + - "kinks being crossed.") -print("") - -print("") -print("Starting other tests.") -print("---") - -test::batch_norm1d() -test::batch_norm2d() -test::conv2d() -test::cross_entropy_loss() -test::im2col() -test::max_pool2d() -test::padding() -test::tanh() - -print("---") -print("Other tests complete -- look for any ERRORs or WARNINGs.") -print("") -print("") -