Repository: systemml Updated Branches: refs/heads/master bf4ba16b9 -> 912b47018
[SYSTEMML-540] Improve the performance of LSTM forward on GPU - This commit improves the performance of LSTM forward by reducing unnecessary ping pongs between CPU-GPU due to left indexing. - There is no performance gains for CPU execution. Closes #756. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/912b4701 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/912b4701 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/912b4701 Branch: refs/heads/master Commit: 912b4701875d4de0db8327479398c32607f4687d Parents: bf4ba16 Author: Niketan Pansare <npan...@us.ibm.com> Authored: Sat Nov 3 05:52:00 2018 +0530 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Sat Nov 3 05:52:00 2018 +0530 ---------------------------------------------------------------------- scripts/nn/layers/lstm.dml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/912b4701/scripts/nn/layers/lstm.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml index 0b0016b..cd1557d 100644 --- a/scripts/nn/layers/lstm.dml +++ b/scripts/nn/layers/lstm.dml @@ -89,13 +89,13 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int T, for (t in 1:T) { # each timestep X_t = X[,(t-1)*D+1:t*D] # shape (N, D) input = cbind(X_t, out_prev) # shape (N, D+M) - ifog = input %*% W + b # input, forget, output, and g gates; shape (N, 4M) - ifog[,1:3*M] = sigmoid::forward(ifog[,1:3*M]) # i,f,o gates squashed with sigmoid - ifog[,3*M+1:4*M] = tanh::forward(ifog[,3*M+1:4*M]) # g gate squashed with tanh + ifog_raw = input %*% W + b # input, forget, output, and g gates; shape (N, 4M) + ifo = sigmoid::forward(ifog_raw[,1:3*M]) # i,f,o gates squashed with sigmoid + g = tanh::forward(ifog_raw[,3*M+1:4*M]) # g gate squashed with tanh # c_t = f*prev_c + i*g - c = ifog[,M+1:2*M]*c_prev + ifog[,1:M]*ifog[,3*M+1:4*M] # shape (N, M) + c = ifo[,M+1:2*M]*c_prev + ifo[,1:M]*g # shape (N, M) # out_t = o*tanh(c) - out_t = ifog[,2*M+1:3*M] * tanh::forward(c) # shape (N, M) + out_t = ifo[,2*M+1:3*M] * tanh::forward(c) # shape (N, M) # store if (return_sequences) { @@ -108,7 +108,7 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int T, c_prev = c cache_out[t,] = matrix(out_t, rows=1, cols=N*M) # reshape cache_c[t,] = matrix(c, rows=1, cols=N*M) # reshape - cache_ifog[t,] = matrix(ifog, rows=1, cols=N*4*M) # reshape + cache_ifog[t,] = matrix(cbind(ifo, g), rows=1, cols=N*4*M) # reshape } }