Repository: systemml
Updated Branches:
  refs/heads/master bf4ba16b9 -> 912b47018


[SYSTEMML-540] Improve the performance of LSTM forward on GPU

- This commit improves the performance of LSTM forward by reducing unnecessary 
ping pongs between CPU-GPU due to left indexing.
- There is no performance gains for CPU execution.

Closes #756.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/912b4701
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/912b4701
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/912b4701

Branch: refs/heads/master
Commit: 912b4701875d4de0db8327479398c32607f4687d
Parents: bf4ba16
Author: Niketan Pansare <npan...@us.ibm.com>
Authored: Sat Nov 3 05:52:00 2018 +0530
Committer: Niketan Pansare <npan...@us.ibm.com>
Committed: Sat Nov 3 05:52:00 2018 +0530

----------------------------------------------------------------------
 scripts/nn/layers/lstm.dml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/912b4701/scripts/nn/layers/lstm.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml
index 0b0016b..cd1557d 100644
--- a/scripts/nn/layers/lstm.dml
+++ b/scripts/nn/layers/lstm.dml
@@ -89,13 +89,13 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b, int T,
   for (t in 1:T) {  # each timestep
     X_t = X[,(t-1)*D+1:t*D]  # shape (N, D)
     input = cbind(X_t, out_prev)  # shape (N, D+M)
-    ifog = input %*% W + b  # input, forget, output, and g gates; shape (N, 4M)
-    ifog[,1:3*M] = sigmoid::forward(ifog[,1:3*M])  # i,f,o gates squashed with 
sigmoid
-    ifog[,3*M+1:4*M] = tanh::forward(ifog[,3*M+1:4*M])  # g gate squashed with 
tanh
+    ifog_raw = input %*% W + b  # input, forget, output, and g gates; shape 
(N, 4M)
+    ifo = sigmoid::forward(ifog_raw[,1:3*M])  # i,f,o gates squashed with 
sigmoid
+    g = tanh::forward(ifog_raw[,3*M+1:4*M])  # g gate squashed with tanh
     # c_t = f*prev_c + i*g
-    c = ifog[,M+1:2*M]*c_prev + ifog[,1:M]*ifog[,3*M+1:4*M]  # shape (N, M)
+    c = ifo[,M+1:2*M]*c_prev + ifo[,1:M]*g  # shape (N, M)
     # out_t = o*tanh(c)
-    out_t = ifog[,2*M+1:3*M] * tanh::forward(c)  # shape (N, M)
+    out_t = ifo[,2*M+1:3*M] * tanh::forward(c)  # shape (N, M)
 
     # store
     if (return_sequences) {
@@ -108,7 +108,7 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b, int T,
     c_prev = c
     cache_out[t,] = matrix(out_t, rows=1, cols=N*M)  # reshape
     cache_c[t,] = matrix(c, rows=1, cols=N*M)  # reshape
-    cache_ifog[t,] = matrix(ifog, rows=1, cols=N*4*M)  # reshape
+    cache_ifog[t,] = matrix(cbind(ifo, g), rows=1, cols=N*4*M)  # reshape
   }
 }
 

Reply via email to