[incubator-mxnet] branch master updated: [R] Initializer fix and adjustments to RNN API (#8121)

qkou Fri, 10 Nov 2017 18:24:42 -0800

This is an automated email from the ASF dual-hosted git repository.

qkou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git



The following commit(s) were added to refs/heads/master by this push:
     new 5c8981a  [R] Initializer fix and adjustments to RNN API (#8121)
5c8981a is described below

commit 5c8981a740d7e05afb664f2228b85eb22a6d1666
Author: jeremiedb <jeremi...@users.noreply.github.com>
AuthorDate: Fri Nov 10 21:24:25 2017 -0500

    [R] Initializer fix and adjustments to RNN API (#8121)
---
 R-package/R/gru.R         | 355 ------------------------------------------
 R-package/R/initializer.R |  32 ++--
 R-package/R/lstm.R        | 388 ----------------------------------------------
 R-package/R/rnn.graph.R   | 160 ++++++++++++-------
 R-package/R/viz.graph.R   |   9 +-
 5 files changed, 125 insertions(+), 819 deletions(-)

diff --git a/R-package/R/gru.R b/R-package/R/gru.R
deleted file mode 100644
index d2ffd9a..0000000
--- a/R-package/R/gru.R
+++ /dev/null
@@ -1,355 +0,0 @@
-# gru cell symbol
-gru <- function(num.hidden, indata, prev.state, param, seqidx, layeridx, 
dropout=0) {
-    if (dropout > 0)
-        indata <- mx.symbol.Dropout(data=indata, p=dropout)
-    i2h <- mx.symbol.FullyConnected(data=indata,
-                                    weight=param$gates.i2h.weight,
-                                    bias=param$gates.i2h.bias,
-                                    num.hidden=num.hidden * 2,
-                                    name=paste0("t", seqidx, ".l", layeridx, 
".gates.i2h"))
-    h2h <- mx.symbol.FullyConnected(data=prev.state$h,
-                                    weight=param$gates.h2h.weight,
-                                    bias=param$gates.h2h.bias,
-                                    num.hidden=num.hidden * 2,
-                                    name=paste0("t", seqidx, ".l", layeridx, 
".gates.h2h"))
-    gates <- i2h + h2h
-    slice.gates <- mx.symbol.SliceChannel(gates, num.outputs=2,
-                                          name=paste0("t", seqidx, ".l", 
layeridx, ".slice"))
-    update.gate <- mx.symbol.Activation(slice.gates[[1]], act.type="sigmoid")
-    reset.gate <- mx.symbol.Activation(slice.gates[[2]], act.type="sigmoid")
-
-    htrans.i2h <- mx.symbol.FullyConnected(data=indata,
-                                           weight=param$trans.i2h.weight,
-                                           bias=param$trans.i2h.bias,
-                                           num.hidden=num.hidden,
-                                           name=paste0("t", seqidx, ".l", 
layeridx, ".trans.i2h"))
-    h.after.reset <- prev.state$h * reset.gate
-    htrans.h2h <- mx.symbol.FullyConnected(data=h.after.reset,
-                                           weight=param$trans.h2h.weight,
-                                           bias=param$trans.h2h.bias,
-                                           num.hidden=num.hidden,
-                                           name=paste0("t", seqidx, ".l", 
layeridx, ".trans.h2h"))
-    h.trans <- htrans.i2h + htrans.h2h
-    h.trans.active <- mx.symbol.Activation(h.trans, act.type="tanh")
-    next.h <- prev.state$h + update.gate * (h.trans.active - prev.state$h)
-    return (list(h=next.h))
-}
-
-# unrolled gru network
-gru.unroll <- function(num.gru.layer, seq.len, input.size,
-                       num.hidden, num.embed, num.label, dropout=0) {
-    embed.weight <- mx.symbol.Variable("embed.weight")
-    cls.weight <- mx.symbol.Variable("cls.weight")
-    cls.bias <- mx.symbol.Variable("cls.bias")
-    param.cells <- lapply(1:num.gru.layer, function(i) {
-        cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0("l", i, 
".gates.i2h.weight")),
-                     gates.i2h.bias = mx.symbol.Variable(paste0("l", i, 
".gates.i2h.bias")),
-                     gates.h2h.weight = mx.symbol.Variable(paste0("l", i, 
".gates.h2h.weight")),
-                     gates.h2h.bias = mx.symbol.Variable(paste0("l", i, 
".gates.h2h.bias")),
-                     trans.i2h.weight = mx.symbol.Variable(paste0("l", i, 
".trans.i2h.weight")),
-                     trans.i2h.bias = mx.symbol.Variable(paste0("l", i, 
".trans.i2h.bias")),
-                     trans.h2h.weight = mx.symbol.Variable(paste0("l", i, 
".trans.h2h.weight")),
-                     trans.h2h.bias = mx.symbol.Variable(paste0("l", i, 
".trans.h2h.bias")))
-        return (cell)
-    })
-    last.states <- lapply(1:num.gru.layer, function(i) {
-        state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h")))
-        return (state)
-    })
-
-    # embeding layer
-    label <- mx.symbol.Variable("label")
-    data <- mx.symbol.Variable("data")
-    embed <- mx.symbol.Embedding(data=data, input.dim=input.size,
-                                 weight=embed.weight, output.dim=num.embed, 
name='embed')
-    wordvec <- mx.symbol.SliceChannel(data=embed, num.outputs=seq.len, 
squeeze.axis=1)
-
-    last.hidden <- list()
-    for (seqidx in 1:seq.len) {
-        hidden <- wordvec[[seqidx]]
-        # stack GRU
-        for (i in 1:num.gru.layer) {
-            dp <- ifelse(i==1, 0, dropout)
-            next.state <- gru(num.hidden, indata=hidden,
-                              prev.state=last.states[[i]],
-                              param=param.cells[[i]],
-                              seqidx=seqidx, layeridx=i, 
-                              dropout=dp)
-            hidden <- next.state$h
-            last.states[[i]] <- next.state
-        }
-        # decoder
-        if (dropout > 0)
-            hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
-        last.hidden <- c(last.hidden, hidden)
-    }
-    last.hidden$dim <- 0
-    last.hidden$num.args <- seq.len
-    concat <-mxnet:::mx.varg.symbol.Concat(last.hidden)
-    fc <- mx.symbol.FullyConnected(data=concat,
-                                   weight=cls.weight,
-                                   bias=cls.bias,
-                                   num.hidden=num.label)
-
-    label <- mx.symbol.transpose(data=label)
-    label <- mx.symbol.Reshape(data=label, target.shape=c(0))
-
-    loss.all <- mx.symbol.SoftmaxOutput(data=fc, label=label, name="sm")
-    return (loss.all)
-}
-
-# gru inference model symbol
-gru.inference.symbol <- function(num.gru.layer, seq.len, input.size,
-                                 num.hidden, num.embed, num.label, dropout=0) {
-    seqidx <- 1
-    embed.weight <- mx.symbol.Variable("embed.weight")
-    cls.weight <- mx.symbol.Variable("cls.weight")
-    cls.bias <- mx.symbol.Variable("cls.bias")
-
-    param.cells <- lapply(1:num.gru.layer, function(i) {
-        cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0("l", i, 
".gates.i2h.weight")),
-                     gates.i2h.bias = mx.symbol.Variable(paste0("l", i, 
".gates.i2h.bias")),
-                     gates.h2h.weight = mx.symbol.Variable(paste0("l", i, 
".gates.h2h.weight")),
-                     gates.h2h.bias = mx.symbol.Variable(paste0("l", i, 
".gates.h2h.bias")),
-                     trans.i2h.weight = mx.symbol.Variable(paste0("l", i, 
".trans.i2h.weight")),
-                     trans.i2h.bias = mx.symbol.Variable(paste0("l", i, 
".trans.i2h.bias")),
-                     trans.h2h.weight = mx.symbol.Variable(paste0("l", i, 
".trans.h2h.weight")),
-                     trans.h2h.bias = mx.symbol.Variable(paste0("l", i, 
".trans.h2h.bias")))
-        return (cell)
-    })
-    last.states <- lapply(1:num.gru.layer, function(i) {
-        state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h")))
-        return (state)
-    })
-
-    # embeding layer
-    data <- mx.symbol.Variable("data")
-    hidden <- mx.symbol.Embedding(data=data, input_dim=input.size,
-                                  weight=embed.weight, output_dim=num.embed, 
name="embed")
-
-    # stack GRU
-    for (i in 1:num.gru.layer) {
-        dp <- ifelse(i==1, 0, dropout)
-        next.state <- gru(num.hidden, indata=hidden,
-                          prev.state=last.states[[i]],
-                          param=param.cells[[i]],
-                          seqidx=seqidx, layeridx=i, 
-                          dropout=dp)
-        hidden <- next.state$h
-        last.states[[i]] <- next.state
-    }
-    # decoder
-    if (dropout > 0)
-        hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
-
-    fc <- mx.symbol.FullyConnected(data=hidden, num_hidden=num.label,
-                                   weight=cls.weight, bias=cls.bias, 
name='pred')
-    sm <- mx.symbol.SoftmaxOutput(data=fc, name='sm')
-    unpack.h <- lapply(1:num.gru.layer, function(i) {
-        state <- last.states[[i]]
-        state.h <- mx.symbol.BlockGrad(state$h, name=paste0("l", i, ".last.h"))
-        return (state.h)
-    })
-
-    list.all <- c(sm, unpack.h)
-    return (mx.symbol.Group(list.all))
-}
-
-#' Training GRU Unrolled Model
-#'
-#' @param train.data mx.io.DataIter or list(data=R.array, label=R.array)
-#'      The Training set.
-#' @param eval.data mx.io.DataIter or list(data=R.array, label=R.array), 
optional
-#'      The validation set used for validation evaluation during the progress.
-#' @param num.gru.layer integer
-#'      The number of the layer of gru.
-#' @param seq.len integer
-#'      The length of the input sequence.
-#' @param num.hidden integer
-#'      The number of hidden nodes.
-#' @param num.embed integer
-#'      The output dim of embedding.
-#' @param num.label  integer
-#'      The number of labels.
-#' @param batch.size integer
-#'      The batch size used for R array training.
-#' @param input.size integer
-#'       The input dim of one-hot encoding of embedding
-#' @param ctx mx.context, optional
-#'      The device used to perform training.
-#' @param num.round integer, default=10
-#'      The number of iterations over training data to train the model.
-#' @param update.period integer, default=1
-#'      The number of iterations to update parameters during training period.
-#' @param initializer initializer object. default=mx.init.uniform(0.01)
-#'      The initialization scheme for parameters.
-#' @param dropout float, default=0
-#'      A number in [0,1) containing the dropout ratio from the last hidden 
layer to the output layer.
-#' @param optimizer string, default="sgd"
-#'      The optimization method.
-#' @param ... other parameters passing to \code{mx.gru}/.
-#' @return model A trained gru unrolled model.
-#'
-#' @export
-mx.gru <- function( train.data, eval.data=NULL,
-                    num.gru.layer, seq.len,
-                    num.hidden, num.embed, num.label,
-                    batch.size, input.size,
-                    ctx=mx.ctx.default(),
-                    num.round=10, update.period=1,
-                    initializer=mx.init.uniform(0.01),
-                    dropout=0, optimizer='sgd',
-                    ...) {
-    # check data and change data into iterator
-    train.data <- check.data(train.data, batch.size, TRUE)
-    eval.data <- check.data(eval.data, batch.size, FALSE)
-
-    # get unrolled gru symbol
-    rnn.sym <- gru.unroll( num.gru.layer=num.gru.layer,
-                           num.hidden=num.hidden,
-                           seq.len=seq.len,
-                           input.size=input.size,
-                           num.embed=num.embed,
-                           num.label=num.label,
-                           dropout=dropout)
-
-    init.states.name <- lapply(1:num.gru.layer, function(i) {
-        state.h <- paste0("l", i, ".init.h")
-        return (state.h)
-    })
-
-    # set up gru model
-    model <- setup.rnn.model(rnn.sym=rnn.sym,
-                             ctx=ctx,
-                             num.rnn.layer=num.gru.layer,
-                             seq.len=seq.len,
-                             num.hidden=num.hidden,
-                             num.embed=num.embed,
-                             num.label=num.label,
-                             batch.size=batch.size,
-                             input.size=input.size,
-                             init.states.name=init.states.name,
-                             initializer=initializer,
-                             dropout=dropout)
-
-    # train gru model
-    model <- train.rnn( model, train.data, eval.data,
-                        num.round=num.round,
-                        update.period=update.period,
-                        ctx=ctx,
-                        init.states.name=init.states.name,
-                        ...)
-    # change model into MXFeedForwardModel
-    model <- list(symbol=model$symbol, 
arg.params=model$rnn.exec$ref.arg.arrays, 
aux.params=model$rnn.exec$ref.aux.arrays)
-    return(structure(model, class="MXFeedForwardModel"))
-}
-
-#' Create a GRU Inference Model
-#'
-#' @param num.gru.layer integer
-#'      The number of the layer of gru.
-#' @param input.size integer
-#'       The input dim of one-hot encoding of embedding
-#' @param num.hidden integer
-#'      The number of hidden nodes.
-#' @param num.embed integer
-#'      The output dim of embedding.
-#' @param num.label  integer
-#'      The number of labels.
-#' @param batch.size integer, default=1
-#'      The batch size used for R array training.
-#' @param arg.params list
-#'      The batch size used for R array training.
-#' @param ctx mx.context, optional
-#'      Model parameter, list of name to NDArray of net's weights.
-#' @param dropout float, default=0
-#'      A number in [0,1) containing the dropout ratio from the last hidden 
layer to the output layer.
-#' @return model list(rnn.exec=integer, symbol=mxnet symbol, 
num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, 
num.embed=integer) 
-#'      A gru inference model.
-#'
-#' @export
-mx.gru.inference <- function(num.gru.layer,
-                             input.size,
-                             num.hidden,
-                             num.embed,
-                             num.label,
-                             batch.size=1,
-                             arg.params,
-                             ctx=mx.cpu(),
-                             dropout=0.) {
-    sym <- gru.inference.symbol(num.gru.layer=num.gru.layer,
-                                 input.size=input.size,
-                                 num.hidden=num.hidden,
-                                 num.embed=num.embed,
-                                 num.label=num.label,
-                                 dropout=dropout)
-
-    init.states.name <- lapply(1:num.gru.layer, function(i) {
-        state.h <- paste0("l", i, ".init.h")
-        return (state.h)
-    })
-
-    seq.len <- 1
-    # set up gru model
-    model <- setup.rnn.model(rnn.sym=sym,
-                             ctx=ctx,
-                             num.rnn.layer=num.gru.layer,
-                             seq.len=seq.len,
-                             num.hidden=num.hidden,
-                             num.embed=num.embed,
-                             num.label=num.label,
-                             batch.size=batch.size,
-                             input.size=input.size,
-                             init.states.name=init.states.name,
-                             initializer=mx.init.uniform(0.01),
-                             dropout=dropout)
-    arg.names <- names(model$rnn.exec$ref.arg.arrays)
-    for (k in names(arg.params)) {
-        if ((k %in% arg.names) && is.param.name(k) ) {
-            rnn.input <- list()
-            rnn.input[[k]] <- arg.params[[k]]
-            mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, 
match.name=TRUE)
-        }
-    }
-    init.states <- list()
-    for (i in 1:num.gru.layer) {
-        init.states[[paste0("l", i, ".init.h")]] <- 
model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
-    }
-    mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
-
-    return (model)
-}
-
-#' Using forward function to predict in gru inference model
-#'
-#' @param model gru model
-#'      A gru inference model
-#' @param input.data, array.matrix
-#'      The input data for forward function
-#' @param new.seq boolean, default=FALSE
-#'      Whether the input is the start of a new sequence
-#'
-#' @return result A list(prob=prob, model=model) containing the result 
probability of each label and the model.
-#'
-#' @export
-mx.gru.forward <- function(model, input.data, new.seq=FALSE) {
-    if (new.seq == TRUE) {
-        init.states <- list()
-        for (i in 1:model$num.rnn.layer) {
-            init.states[[paste0("l", i, ".init.h")]] <- 
model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
-        }
-        mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
-    }
-    dim(input.data) <- c(model$batch.size)
-    data <- list(data=mx.nd.array(input.data))
-    mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE)
-    mx.exec.forward(model$rnn.exec, is.train=FALSE)
-    init.states <- list()
-    for (i in 1:model$num.rnn.layer) {
-        init.states[[paste0("l", i, ".init.h")]] <- 
model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]]
-    }
-    mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
-    prob <- model$rnn.exec$ref.outputs[["sm_output"]]
-    return (list(prob=prob, model=model))
-}
-
diff --git a/R-package/R/initializer.R b/R-package/R/initializer.R
index 7a1ffb2..9f5e75b 100644
--- a/R-package/R/initializer.R
+++ b/R-package/R/initializer.R
@@ -4,11 +4,11 @@
 #' @param shape the shape of the array to be generated.
 #'
 mx.init.internal.default <- function(name, shape, ctx, allow.unknown=FALSE) {
-  if (endsWith(name, "bias")) return (mx.nd.zeros(shape, ctx))
-  if (endsWith(name, "gamma")) return (mx.nd.ones(shape, ctx))
-  if (endsWith(name, "beta")) return (mx.nd.zeros(shape, ctx))
-  if (endsWith(name, "moving_mean")) return (mx.nd.zeros(shape, ctx))
-  if (endsWith(name, "moving_var")) return (mx.nd.ones(shape, ctx))
+  if (endsWith(name, "bias")) return (mx.nd.zeros(shape))
+  if (endsWith(name, "gamma")) return (mx.nd.ones(shape))
+  if (endsWith(name, "beta")) return (mx.nd.zeros(shape))
+  if (endsWith(name, "moving_mean")) return (mx.nd.zeros(shape))
+  if (endsWith(name, "moving_var")) return (mx.nd.ones(shape))
   if (allow.unknown) return(NULL)
   stop(paste("Unkown initialization pattern for ", name))
 }
@@ -21,9 +21,9 @@ mx.init.internal.default <- function(name, shape, ctx, 
allow.unknown=FALSE) {
 mx.init.uniform <- function(scale) {
   function(name, shape, ctx, allow.unknown=FALSE) {
     if (!endsWith(name, "weight")) {
-      return (mx.init.internal.default(name, shape, ctx, allow.unknown))
+      return (mx.init.internal.default(name = name, shape = shape, 
allow.unknown = allow.unknown))
     }
-    return (mx.runif(shape, -scale, scale, ctx))
+    return (mx.nd.random.uniform(low = -scale, high = scale, shape = shape))
   }
 }
 
@@ -35,9 +35,9 @@ mx.init.uniform <- function(scale) {
 mx.init.normal <- function(sd) {
   function(name, shape, ctx, allow.unknown=FALSE) {
     if (!endsWith(name, "weight")) {
-      return (mx.init.internal.default(name, shape, ctx, allow.unknown))
+      return (mx.init.internal.default(name = name, shape = shape, 
allow.unknown = allow.unknown))
     }
-    return (mx.rnorm(shape, 0, sd, ctx))
+    return (mx.nd.random.normal(loc = 0, scale = sd, shape = shape))
   }
 }
 
@@ -56,9 +56,9 @@ mx.init.Xavier <- function(rnd_type = "uniform", factor_type 
= "avg",
                            magnitude = 3){
   function(name, shape, ctx, allow.unknown = FALSE){
     if (!endsWith(name, "weight")) {
-      return (mx.init.internal.default(name, shape, ctx, allow.unknown))
+      return (mx.init.internal.default(name = name, shape = shape, 
allow.unknown = allow.unknown))
     }
-
+    
     fan_out = shape[length(shape)]
     fan_in  = prod(shape[-length(shape)])
     factor_val  = 1
@@ -71,13 +71,13 @@ mx.init.Xavier <- function(rnd_type = "uniform", 
factor_type = "avg",
     } else {
       stop("Not supported factor type. See usage of function mx.init.Xavier")
     }
-
+    
     scale = sqrt(magnitude / factor_val)
-
+    
     if (rnd_type == "uniform"){
-      return(mx.runif(shape, -scale, scale, ctx))
+      return(mx.nd.random.uniform(low = -scale, high = scale, shape = shape))
     } else if (rnd_type == "gaussian"){
-      return(mx.rnorm(shape, 0, scale, ctx))
+      return(mx.nd.random.normal(loc = 0, scale = scale, shape = shape))
     } else {
       stop("Not supported random type. See usage of function mx.init.Xavier")
     }
@@ -92,7 +92,7 @@ mx.init.Xavier <- function(rnd_type = "uniform", factor_type 
= "avg",
 #' @param ctx mx.context The context of the weights
 #' @param skip.unknown Whether skip the unknown weight types
 #' @export
-mx.init.create <- function(initializer, shape.array, ctx, skip.unknown=TRUE) {
+mx.init.create <- function(initializer, shape.array, ctx=NULL, 
skip.unknown=TRUE) {
   if (length(shape.array) == 0) return(list())
   names = names(shape.array)
   ret <- lapply(1 : length(names), function(i) {
diff --git a/R-package/R/lstm.R b/R-package/R/lstm.R
deleted file mode 100644
index 6223889..0000000
--- a/R-package/R/lstm.R
+++ /dev/null
@@ -1,388 +0,0 @@
-# lstm cell symbol
-lstm <- function(num.hidden, indata, prev.state, param, seqidx, layeridx, 
dropout=0) {
-    if (dropout > 0)
-        indata <- mx.symbol.Dropout(data=indata, p=dropout)
-    i2h <- mx.symbol.FullyConnected(data=indata,
-                                    weight=param$i2h.weight,
-                                    bias=param$i2h.bias,
-                                    num.hidden=num.hidden * 4,
-                                    name=paste0("t", seqidx, ".l", layeridx, 
".i2h"))
-    h2h <- mx.symbol.FullyConnected(data=prev.state$h,
-                                    weight=param$h2h.weight,
-                                    bias=param$h2h.bias,
-                                    num.hidden=num.hidden * 4,
-                                    name=paste0("t", seqidx, ".l", layeridx, 
".h2h"))
-    gates <- i2h + h2h
-    slice.gates <- mx.symbol.SliceChannel(gates, num.outputs=4,
-                                          name=paste0("t", seqidx, ".l", 
layeridx, ".slice"))
-
-    in.gate <- mx.symbol.Activation(slice.gates[[1]], act.type="sigmoid")
-    in.transform <- mx.symbol.Activation(slice.gates[[2]], act.type="tanh")
-    forget.gate <- mx.symbol.Activation(slice.gates[[3]], act.type="sigmoid")
-    out.gate <- mx.symbol.Activation(slice.gates[[4]], act.type="sigmoid")
-    next.c <- (forget.gate * prev.state$c) + (in.gate * in.transform)
-    next.h <- out.gate * mx.symbol.Activation(next.c, act.type="tanh")
-
-    return (list(c=next.c, h=next.h))
-}
-
-# unrolled lstm network
-lstm.unroll <- function(num.lstm.layer, seq.len, input.size,
-                        num.hidden, num.embed, num.label, dropout=0.) {
-
-    embed.weight <- mx.symbol.Variable("embed.weight")
-    cls.weight <- mx.symbol.Variable("cls.weight")
-    cls.bias <- mx.symbol.Variable("cls.bias")
-
-    param.cells <- lapply(1:num.lstm.layer, function(i) {
-        cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, 
".i2h.weight")),
-                     i2h.bias = mx.symbol.Variable(paste0("l", i, 
".i2h.bias")),
-                     h2h.weight = mx.symbol.Variable(paste0("l", i, 
".h2h.weight")),
-                     h2h.bias = mx.symbol.Variable(paste0("l", i, 
".h2h.bias")))
-        return (cell)
-    })
-    last.states <- lapply(1:num.lstm.layer, function(i) {
-        state <- list(c=mx.symbol.Variable(paste0("l", i, ".init.c")),
-                      h=mx.symbol.Variable(paste0("l", i, ".init.h")))
-        return (state)
-    })
-
-    # embeding layer
-    label <- mx.symbol.Variable("label")
-    data <- mx.symbol.Variable("data")
-    embed <- mx.symbol.Embedding(data=data, input_dim=input.size,
-                                 weight=embed.weight, output_dim=num.embed, 
name="embed")
-    wordvec <- mx.symbol.SliceChannel(data=embed, num_outputs=seq.len, 
squeeze_axis=1)
-
-    last.hidden <- list()
-    for (seqidx in 1:seq.len) {
-        hidden <- wordvec[[seqidx]]
-        # stack lstm
-        for (i in 1:num.lstm.layer) {
-            dp <- ifelse(i==1, 0, dropout)
-            next.state <- lstm(num.hidden, indata=hidden,
-                               prev.state=last.states[[i]],
-                               param=param.cells[[i]],
-                               seqidx=seqidx, layeridx=i,
-                               dropout=dp)
-            hidden <- next.state$h
-            last.states[[i]] <- next.state
-        }
-        # decoder
-        if (dropout > 0)
-            hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
-        last.hidden <- c(last.hidden, hidden)
-    }
-    last.hidden$dim <- 0
-    last.hidden$num.args <- seq.len
-    concat <-mxnet:::mx.varg.symbol.Concat(last.hidden)
-    fc <- mx.symbol.FullyConnected(data=concat,
-                                   weight=cls.weight,
-                                   bias=cls.bias,
-                                   num.hidden=num.label)
-
-    label <- mx.symbol.transpose(data=label)
-    label <- mx.symbol.Reshape(data=label, target.shape=c(0))
-
-    loss.all <- mx.symbol.SoftmaxOutput(data=fc, label=label, name="sm")
-    return (loss.all)
-}
-
-# lstm inference model symbol
-lstm.inference.symbol <- function(num.lstm.layer, input.size,
-                                  num.hidden, num.embed, num.label, 
dropout=0.) {
-    seqidx <- 0
-    embed.weight <- mx.symbol.Variable("embed.weight")
-    cls.weight <- mx.symbol.Variable("cls.weight")
-    cls.bias <- mx.symbol.Variable("cls.bias")
-
-    param.cells <- lapply(1:num.lstm.layer, function(i) {
-        cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, 
".i2h.weight")),
-                     i2h.bias = mx.symbol.Variable(paste0("l", i, 
".i2h.bias")),
-                     h2h.weight = mx.symbol.Variable(paste0("l", i, 
".h2h.weight")),
-                     h2h.bias = mx.symbol.Variable(paste0("l", i, 
".h2h.bias")))
-        return (cell)
-    })
-    last.states <- lapply(1:num.lstm.layer, function(i) {
-        state <- list(c=mx.symbol.Variable(paste0("l", i, ".init.c")),
-                      h=mx.symbol.Variable(paste0("l", i, ".init.h")))
-        return (state)
-    })
-
-    # embeding layer
-    data <- mx.symbol.Variable("data")
-    hidden <- mx.symbol.Embedding(data=data, input_dim=input.size,
-                                  weight=embed.weight, output_dim=num.embed, 
name="embed")
-
-    # stack lstm
-    for (i in 1:num.lstm.layer) {
-        dp <- ifelse(i==1, 0, dropout)
-        next.state <- lstm(num.hidden, indata=hidden,
-                           prev.state=last.states[[i]],
-                           param=param.cells[[i]],
-                           seqidx=seqidx, layeridx=i,
-                           dropout=dp)
-        hidden <- next.state$h
-        last.states[[i]] <- next.state
-    }
-    # decoder
-    if (dropout > 0)
-        hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
-
-    fc <- mx.symbol.FullyConnected(data=hidden, num_hidden=num.label,
-                                   weight=cls.weight, bias=cls.bias, 
name='pred')
-    sm <- mx.symbol.SoftmaxOutput(data=fc, name='sm')
-    unpack.c <- lapply(1:num.lstm.layer, function(i) {
-        state <- last.states[[i]]
-        state.c <- mx.symbol.BlockGrad(state$c, name=paste0("l", i, ".last.c"))
-        return (state.c)
-    })
-    unpack.h <- lapply(1:num.lstm.layer, function(i) {
-        state <- last.states[[i]]
-        state.h <- mx.symbol.BlockGrad(state$h, name=paste0("l", i, ".last.h"))
-        return (state.h)
-    })
-
-    list.all <- c(sm, unpack.c, unpack.h)
-    return (mx.symbol.Group(list.all))
-}
-
-
-
-#' Training LSTM Unrolled Model
-#'
-#' @param train.data mx.io.DataIter or list(data=R.array, label=R.array)
-#'      The Training set.
-#' @param eval.data mx.io.DataIter or list(data=R.array, label=R.array), 
optional
-#'      The validation set used for validation evaluation during the progress.
-#' @param num.lstm.layer integer
-#'      The number of the layer of lstm.
-#' @param seq.len integer
-#'      The length of the input sequence.
-#' @param num.hidden integer
-#'      The number of hidden nodes.
-#' @param num.embed integer
-#'      The output dim of embedding.
-#' @param num.label  integer
-#'      The number of labels.
-#' @param batch.size integer
-#'      The batch size used for R array training.
-#' @param input.size integer
-#'       The input dim of one-hot encoding of embedding
-#' @param ctx mx.context, optional
-#'      The device used to perform training.
-#' @param num.round integer, default=10
-#'      The number of iterations over training data to train the model.
-#' @param update.period integer, default=1
-#'      The number of iterations to update parameters during training period.
-#' @param initializer initializer object. default=mx.init.uniform(0.01)
-#'      The initialization scheme for parameters.
-#' @param dropout float, default=0
-#'      A number in [0,1) containing the dropout ratio from the last hidden 
layer to the output layer.
-#' @param optimizer string, default="sgd"
-#'      The optimization method.
-#' @param epoch.end.callback function, optional
-#'     The callback when iteration ends.
-#' @param batch.end.callback function, optional
-#'     The callback when one mini-batch iteration ends.
-#' @param ... other parameters passing to \code{mx.lstm}/.
-#' @return model A trained lstm unrolled model.
-#'
-#' @export
-mx.lstm <- function(train.data, eval.data=NULL,
-                    num.lstm.layer, seq.len,
-                    num.hidden, num.embed, num.label,
-                    batch.size, input.size,
-                    ctx=mx.ctx.default(),
-                    num.round=10, update.period=1,
-                    initializer=mx.init.uniform(0.01),
-                    dropout=0, optimizer='sgd',
-                    epoch.end.callback=NULL, batch.end.callback=NULL,
-                    model,
-                    arg.params,
-                    ...) {
-    # check data and change data into iterator
-    train.data <- check.data(train.data, batch.size, TRUE)
-    eval.data <- check.data(eval.data, batch.size, FALSE)
-    
-    
-
-    # get unrolled lstm symbol
-    if(missing(model)){
-        rnn.sym <- lstm.unroll(num.lstm.layer=num.lstm.layer,
-                           num.hidden=num.hidden,
-                           seq.len=seq.len,
-                           input.size=input.size,
-                           num.embed=num.embed,
-                           num.label=num.label,
-                           dropout=dropout)
-    } else {
-      rnn.sym=model$symbol
-    }
-
-    init.states.c <- lapply(1:num.lstm.layer, function(i) {
-        state.c <- paste0("l", i, ".init.c")
-        return (state.c)
-    })
-    init.states.h <- lapply(1:num.lstm.layer, function(i) {
-        state.h <- paste0("l", i, ".init.h")
-        return (state.h)
-    })
-    init.states.name <- c(init.states.c, init.states.h)
-
-    # set up lstm model
-    model <- setup.rnn.model(rnn.sym=rnn.sym,
-                             ctx=ctx,
-                             num.rnn.layer=num.lstm.layer,
-                             seq.len=seq.len,
-                             num.hidden=num.hidden,
-                             num.embed=num.embed,
-                             num.label=num.label,
-                             batch.size=batch.size,
-                             input.size=input.size,
-                             init.states.name=init.states.name,
-                             initializer=initializer,
-                             dropout=dropout)
-    # restore states
-    if (!missing(arg.params)){
-      arg.names <- names(model$rnn.exec$ref.arg.arrays)
-      for (k in names(arg.params)) {
-        if ((k %in% arg.names) && is.param.name(k) ) {
-          rnn.input <- list()
-          rnn.input[[k]] <- arg.params[[k]]
-          mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, match.name=TRUE)
-        }
-      }
-    }
-
-    # train lstm model
-    model <- train.rnn( model, train.data, eval.data,
-                        num.round=num.round,
-                        update.period=update.period,
-                        ctx=ctx,
-                        init.states.name=init.states.name,
-                        epoch.end.callback=epoch.end.callback, 
-                        batch.end.callback=batch.end.callback,
-                        ...)
-    # change model into MXFeedForwardModel
-    model <- list(symbol=model$symbol, 
arg.params=model$rnn.exec$ref.arg.arrays, 
aux.params=model$rnn.exec$ref.aux.arrays)
-    return(structure(model, class="MXFeedForwardModel"))
-}
-
-
-#' Create a LSTM Inference Model
-#'
-#' @param num.lstm.layer integer
-#'      The number of the layer of lstm.
-#' @param input.size integer
-#'       The input dim of one-hot encoding of embedding
-#' @param num.hidden integer
-#'      The number of hidden nodes.
-#' @param num.embed integer
-#'      The output dim of embedding.
-#' @param num.label  integer
-#'      The number of labels.
-#' @param batch.size integer, default=1
-#'      The batch size used for R array training.
-#' @param arg.params list
-#'      The batch size used for R array training.
-#' @param ctx mx.context, optional
-#'      Model parameter, list of name to NDArray of net's weights.
-#' @param dropout float, default=0
-#'      A number in [0,1) containing the dropout ratio from the last hidden 
layer to the output layer.
-#' @return model list(rnn.exec=integer, symbol=mxnet symbol, 
num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, 
num.embed=integer) 
-#'      A lstm inference model.
-#'
-#' @export
-mx.lstm.inference <- function(num.lstm.layer,
-                              input.size,
-                              num.hidden,
-                              num.embed,
-                              num.label,
-                              batch.size=1,
-                              arg.params,
-                              ctx=mx.cpu(),
-                              dropout=0.) {
-    sym <- lstm.inference.symbol(num.lstm.layer=num.lstm.layer,
-                                 input.size=input.size,
-                                 num.hidden=num.hidden,
-                                 num.embed=num.embed,
-                                 num.label=num.label,
-                                 dropout=dropout)
-
-    init.states.c <- lapply(1:num.lstm.layer, function(i) {
-        state.c <- paste0("l", i, ".init.c")
-        return (state.c)
-    })
-    init.states.h <- lapply(1:num.lstm.layer, function(i) {
-        state.h <- paste0("l", i, ".init.h")
-        return (state.h)
-    })
-    init.states.name <- c(init.states.c, init.states.h)
-
-    seq.len <- 1
-    # set up lstm model
-    model <- setup.rnn.model(rnn.sym=sym,
-                             ctx=ctx,
-                             num.rnn.layer=num.lstm.layer,
-                             seq.len=seq.len,
-                             num.hidden=num.hidden,
-                             num.embed=num.embed,
-                             num.label=num.label,
-                             batch.size=batch.size,
-                             input.size=input.size,
-                             init.states.name=init.states.name,
-                             initializer=mx.init.uniform(0.01),
-                             dropout=dropout)
-    arg.names <- names(model$rnn.exec$ref.arg.arrays)
-    for (k in names(arg.params)) {
-        if ((k %in% arg.names) && is.param.name(k) ) {
-            rnn.input <- list()
-            rnn.input[[k]] <- arg.params[[k]]
-            mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, 
match.name=TRUE)
-        }
-    }
-    init.states <- list()
-    for (i in 1:num.lstm.layer) {
-        init.states[[paste0("l", i, ".init.c")]] <- 
model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0
-        init.states[[paste0("l", i, ".init.h")]] <- 
model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
-    }
-    mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
-
-    return (model)
-}
-
-#' Using forward function to predict in lstm inference model
-#'
-#' @param model lstm model
-#'      A Lstm inference model
-#' @param input.data, array.matrix
-#'      The input data for forward function
-#' @param new.seq boolean, default=FALSE
-#'      Whether the input is the start of a new sequence
-#'
-#' @return result A list(prob=prob, model=model) containing the result 
probability of each label and the model.
-#'
-#' @export
-mx.lstm.forward <- function(model, input.data, new.seq=FALSE) {
-    if (new.seq == TRUE) {
-        init.states <- list()
-        for (i in 1:model$num.rnn.layer) {
-            init.states[[paste0("l", i, ".init.c")]] <- 
model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0
-            init.states[[paste0("l", i, ".init.h")]] <- 
model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
-        }
-        mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
-    }
-    dim(input.data) <- c(model$batch.size)
-    data <- list(data=mx.nd.array(input.data))
-    mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE)
-    mx.exec.forward(model$rnn.exec, is.train=FALSE)
-    init.states <- list()
-    for (i in 1:model$num.rnn.layer) {
-        init.states[[paste0("l", i, ".init.c")]] <- 
model$rnn.exec$ref.outputs[[paste0("l", i, ".last.c_output")]]
-        init.states[[paste0("l", i, ".init.h")]] <- 
model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]]
-    }
-    mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
-    prob <- model$rnn.exec$ref.outputs[["sm_output"]]
-    return (list(prob=prob, model=model))
-}
diff --git a/R-package/R/rnn.graph.R b/R-package/R/rnn.graph.R
index 11e5ef5..2c099f0 100644
--- a/R-package/R/rnn.graph.R
+++ b/R-package/R/rnn.graph.R
@@ -5,32 +5,33 @@
 #' @param cell.type Type of RNN cell: either gru or lstm
 #' @param num.rnn.layer int, number of stacked layers
 #' @param num.hidden int, size of the state in each RNN layer
-#' @param num.embed  int, dimension of the embedding vectors
-#' @param num.label int, number of categories in labels
-#' @param input.size int, number of levels in the data
+#' @param num.embed  int, default = NULL - no embedding. Dimension of the 
embedding vectors
+#' @param num.decode int, number of output variables in the decoding layer
+#' @param input.size int, number of levels in the data - only used for 
embedding
 #' @param dropout
 #' 
 #' @export
 rnn.graph <- function(num.rnn.layer, 
-                      input.size,
-                      num.embed, 
+                      input.size = NULL,
+                      num.embed = NULL, 
                       num.hidden,
-                      num.label,
+                      num.decode,
                       dropout = 0,
                       ignore_label = -1,
+                      loss_output = NULL, 
                       config,
                       cell.type,
                       masking = F,
                       output_last_state = F) {
   
   # define input arguments
-  label <- mx.symbol.Variable("label")
   data <- mx.symbol.Variable("data")
+  label <- mx.symbol.Variable("label")
   seq.mask <- mx.symbol.Variable("seq.mask")
   
-  embed.weight <- mx.symbol.Variable("embed.weight")
-  rnn.params.weight <- mx.symbol.Variable("rnn.params.weight")
+  if (!is.null(num.embed)) embed.weight <- mx.symbol.Variable("embed.weight")
   
+  rnn.params.weight <- mx.symbol.Variable("rnn.params.weight")
   rnn.state <- mx.symbol.Variable("rnn.state")
   
   if (cell.type == "lstm") {
@@ -40,15 +41,17 @@ rnn.graph <- function(num.rnn.layer,
   cls.weight <- mx.symbol.Variable("cls.weight")
   cls.bias <- mx.symbol.Variable("cls.bias")
   
-  embed <- mx.symbol.Embedding(data=data, input_dim=input.size,
-                               weight=embed.weight, output_dim=num.embed, 
name="embed")
+  if (!is.null(num.embed)){
+    data <- mx.symbol.Embedding(data=data, input_dim=input.size,
+                                weight=embed.weight, output_dim=num.embed, 
name="embed")
+  }
   
   # RNN cells
   if (cell.type == "lstm") {
-    rnn <- mx.symbol.RNN(data=embed, state=rnn.state, state_cell = 
rnn.state.cell, parameters=rnn.params.weight, state.size=num.hidden, 
num.layers=num.rnn.layer, bidirectional=F, mode=cell.type, 
state.outputs=output_last_state, p=dropout, name=paste(cell.type, 
num.rnn.layer, "layer", sep="_"))
+    rnn <- mx.symbol.RNN(data=data, state=rnn.state, state_cell = 
rnn.state.cell, parameters=rnn.params.weight, state.size=num.hidden, 
num.layers=num.rnn.layer, bidirectional=F, mode=cell.type, 
state.outputs=output_last_state, p=dropout, name=paste(cell.type, 
num.rnn.layer, "layer", sep="_"))
     
   } else {
-    rnn <- mx.symbol.RNN(data=embed, state=rnn.state, 
parameters=rnn.params.weight, state.size=num.hidden, num.layers=num.rnn.layer, 
bidirectional=F, mode=cell.type, state.outputs=output_last_state, p=dropout, 
name=paste(cell.type, num.rnn.layer, "layer", sep="_"))
+    rnn <- mx.symbol.RNN(data=data, state=rnn.state, 
parameters=rnn.params.weight, state.size=num.hidden, num.layers=num.rnn.layer, 
bidirectional=F, mode=cell.type, state.outputs=output_last_state, p=dropout, 
name=paste(cell.type, num.rnn.layer, "layer", sep="_"))
   }
   
   # Decode
@@ -57,30 +60,44 @@ rnn.graph <- function(num.rnn.layer,
     if (masking) mask <- mx.symbol.SequenceLast(data=rnn[[1]], 
use.sequence.length = T, sequence_length = seq.mask, name = "mask") else
       mask <- mx.symbol.SequenceLast(data=rnn[[1]], use.sequence.length = F, 
name = "mask")
     
-    fc <- mx.symbol.FullyConnected(data=mask,
-                                   weight=cls.weight,
-                                   bias=cls.bias,
-                                   num.hidden=num.label,
-                                   name = "decode")
+    decode <- mx.symbol.FullyConnected(data=mask,
+                                       weight=cls.weight,
+                                       bias=cls.bias,
+                                       num.hidden=num.decode,
+                                       name = "decode")
     
-    loss <- mx.symbol.SoftmaxOutput(data=fc, label=label, use_ignore = 
!ignore_label == -1, ignore_label = ignore_label, name = "loss")
+    if (!is.null(loss_output)) {
+      loss <- switch(loss_output,
+                     softmax = mx.symbol.SoftmaxOutput(data=decode, 
label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, 
name = "loss"),
+                     linear = mx.symbol.LinearRegressionOutput(data=decode, 
label=label, name = "loss"),
+                     logictic = 
mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"),
+                     MAE = mx.symbol.MAERegressionOutput(data=decode, 
label=label, name = "loss")
+      )
+    } else loss <- decode
     
   } else if (config=="one-to-one"){
     
     if (masking) mask <- mx.symbol.SequenceMask(data = rnn[[1]], 
use.sequence.length = T, sequence_length = seq.mask, value = 0, name = "mask") 
else
       mask <- mx.symbol.identity(data = rnn[[1]], name = "mask")
-
-    reshape = mx.symbol.reshape(mask, shape=c(num.hidden, -1))
+    
+    mask = mx.symbol.reshape(mask, shape=c(num.hidden, -1))
     
     decode <- mx.symbol.FullyConnected(data=reshape,
                                        weight=cls.weight,
                                        bias=cls.bias,
-                                       num.hidden=num.label,
+                                       num.hidden=num.decode,
                                        name = "decode")
     
     label <- mx.symbol.reshape(data=label, shape=c(-1), name = "label_reshape")
-    loss <- mx.symbol.SoftmaxOutput(data=decode, label=label, use_ignore = 
!ignore_label == -1, ignore_label = ignore_label, name = "loss")
     
+    if (!is.null(loss_output)) {
+      loss <- switch(loss_output,
+                     softmax = mx.symbol.SoftmaxOutput(data=decode, 
label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, 
name = "loss"),
+                     linear = mx.symbol.LinearRegressionOutput(data=decode, 
label=label, name = "loss"),
+                     logictic = 
mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"),
+                     MAE = mx.symbol.MAERegressionOutput(data=decode, 
label=label, name = "loss")
+      )
+    } else loss <- decode
   }
   return(loss)
 }
@@ -176,21 +193,24 @@ gru.cell <- function(num.hidden, indata, prev.state, 
param, seqidx, layeridx, dr
 #' unroll representation of RNN running on non CUDA device - under development
 #' 
 #' @export
-rnn.unroll <- function(num.rnn.layer, 
-                       seq.len, 
-                       input.size,
-                       num.embed, 
-                       num.hidden,
-                       num.label,
-                       dropout,
-                       ignore_label,
-                       init.state=NULL,
-                       config,
-                       cell.type="lstm", 
-                       masking = F, 
-                       output_last_state=F) {
-  
-  embed.weight <- mx.symbol.Variable("embed.weight")
+rnn.graph.unroll <- function(num.rnn.layer, 
+                             seq.len, 
+                             input.size = NULL,
+                             num.embed = NULL, 
+                             num.hidden,
+                             num.decode,
+                             dropout = 0,
+                             ignore_label = -1,
+                             loss_output = NULL, 
+                             init.state = NULL,
+                             config,
+                             cell.type = "lstm", 
+                             masking = F, 
+                             output_last_state = F) {
+  
+  
+  if (!is.null(num.embed)) embed.weight <- mx.symbol.Variable("embed.weight")
+  
   cls.weight <- mx.symbol.Variable("cls.weight")
   cls.bias <- mx.symbol.Variable("cls.bias")
   
@@ -215,19 +235,22 @@ rnn.unroll <- function(num.rnn.layer,
   })
   
   # embeding layer
-  label <- mx.symbol.Variable("label")
   data <- mx.symbol.Variable("data")
+  label <- mx.symbol.Variable("label")
+  seq.mask <- mx.symbol.Variable("seq.mask")
   
-  embed <- mx.symbol.Embedding(data = data, input_dim = input.size,
-                               weight=embed.weight, output_dim = num.embed, 
name = "embed")
+  if (!is.null(num.embed)) {
+    data <- mx.symbol.Embedding(data = data, input_dim = input.size,
+                                weight=embed.weight, output_dim = num.embed, 
name = "embed")
+  }
   
-  embed <- mx.symbol.split(data = embed, axis = 0, num.outputs = seq.len, 
squeeze_axis = T)
+  data <- mx.symbol.split(data = data, axis = 0, num.outputs = seq.len, 
squeeze_axis = T)
   
   last.hidden <- list()
   last.states <- list()
   
   for (seqidx in 1:seq.len) {
-    hidden <- embed[[seqidx]]
+    hidden <- data[[seqidx]]
     
     for (i in 1:num.rnn.layer) {
       
@@ -250,34 +273,57 @@ rnn.unroll <- function(num.rnn.layer,
       last.states[[i]] <- next.state
     }
     
-    # Decoding
-    if (config=="one-to-one"){
-      last.hidden <- c(last.hidden, hidden)
-    }
+    # Aggregate outputs from each timestep
+    last.hidden <- c(last.hidden, hidden)
   }
   
+  # concat hidden units - concat seq.len blocks of dimension num.hidden x 
batch.size
+  concat <- mx.symbol.concat(data = last.hidden, num.args = seq.len, dim = 0, 
name = "concat")
+  concat <- mx.symbol.reshape(data = concat, shape = c(num.hidden, -1, 
seq.len), name = "rnn_reshape")
+  
   if (config=="seq-to-one"){
-    fc <- mx.symbol.FullyConnected(data = hidden,
-                                   weight = cls.weight,
-                                   bias = cls.bias,
-                                   num.hidden = num.label)
     
-    loss <- mx.symbol.SoftmaxOutput(data = fc, name="sm", label=label, 
use_ignore = !ignore_label == -1, ignore_label = ignore_label)
+    if (masking) mask <- mx.symbol.SequenceLast(data=concat, 
use.sequence.length = T, sequence_length = seq.mask, name = "mask") else
+      mask <- mx.symbol.SequenceLast(data=concat, use.sequence.length = F, 
name = "mask")
+    
+    decode <- mx.symbol.FullyConnected(data = mask,
+                                       weight = cls.weight,
+                                       bias = cls.bias,
+                                       num.hidden = num.decode,
+                                       name = "decode")
+    
+    if (!is.null(loss_output)) {
+      loss <- switch(loss_output,
+                     softmax = mx.symbol.SoftmaxOutput(data=decode, 
label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, 
name = "loss"),
+                     linear = mx.symbol.LinearRegressionOutput(data=decode, 
label=label, name = "loss"),
+                     logictic = 
mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"),
+                     MAE = mx.symbol.MAERegressionOutput(data=decode, 
label=label, name = "loss")
+      )
+    } else loss <- decode
     
   } else if (config=="one-to-one"){
     
-    # concat hidden units - concat seq.len blocks of dimension num.hidden x 
batch.size
-    concat <- mx.symbol.concat(data = last.hidden, num.args = seq.len, dim = 
0, name = "concat")
+    if (masking) mask <- mx.symbol.SequenceMask(data = concat, 
use.sequence.length = T, sequence_length = seq.mask, value = 0, name = "mask") 
else
+      mask <- mx.symbol.identity(data = concat, name = "mask")
+    
+    mask = mx.symbol.reshape(mask, shape=c(num.hidden, -1))
     
-    decode <- mx.symbol.FullyConnected(data = concat,
+    decode <- mx.symbol.FullyConnected(data = mask,
                                        weight = cls.weight,
                                        bias = cls.bias,
-                                       num.hidden = num.label,
+                                       num.hidden = num.decode,
                                        name = "decode")
     
     label <- mx.symbol.reshape(data = label, shape = -1, name = 
"label_reshape")
-    loss <- mx.symbol.SoftmaxOutput(data = decode, name="sm", label = label, 
use_ignore = !ignore_label == -1, ignore_label = ignore_label)
     
+    if (!is.null(loss_output)) {
+      loss <- switch(loss_output,
+                     softmax = mx.symbol.SoftmaxOutput(data=decode, 
label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, 
name = "loss"),
+                     linear = mx.symbol.LinearRegressionOutput(data=decode, 
label=label, name = "loss"),
+                     logictic = 
mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"),
+                     MAE = mx.symbol.MAERegressionOutput(data=decode, 
label=label, name = "loss")
+      )
+    } else loss <- decode
   }
   return(loss)
 }
diff --git a/R-package/R/viz.graph.R b/R-package/R/viz.graph.R
index aef90ad..6d13de0 100644
--- a/R-package/R/viz.graph.R
+++ b/R-package/R/viz.graph.R
@@ -123,11 +123,14 @@ graph.viz <- function(symbol, shape=NULL, direction="TD", 
type="graph", graph.wi
     stringsAsFactors=F)
   edges_df$from<- id_dic[as.character(edges_df$from)]
   
-  nodes_df_new<- create_node_df(n = nrow(nodes_df), label=nodes_df$label, 
shape=nodes_df$shape, type="base", penwidth=2, color=nodes_df$color, 
style="filled", fillcolor=adjustcolor(nodes_df$color, alpha.f = 1))
-  edge_df_new<- create_edge_df(from = edges_df$from, to=edges_df$to, 
color="black")
+  nodes_df_new<- create_node_df(n = nrow(nodes_df), label=nodes_df$label, 
shape=nodes_df$shape, type="base", penwidth=2, color=nodes_df$color, 
style="filled", 
+                                fillcolor=adjustcolor(nodes_df$color, alpha.f 
= 1), fontcolor = "black")
+  edge_df_new<- create_edge_df(from = edges_df$from, to=edges_df$to, 
color="black", fontcolor = "black")
   
   if (!is.null(shape)){
-    edges_labels_raw<- 
symbol$get.internals()$infer.shape(list(data=shape))$out.shapes
+    if (is.list(shape)) {
+      edges_labels_raw<- symbol$get.internals()$infer.shape(shape)$out.shapes
+    } else edges_labels_raw<- 
symbol$get.internals()$infer.shape(list(data=shape))$out.shapes
     if (!is.null(edges_labels_raw)){
       edge_label_str <- function(x) paste0(x, collapse="X")
       edges_labels_raw<- sapply(edges_labels_raw, edge_label_str)

-- 
To stop receiving notification emails like this one, please contact
['"comm...@mxnet.apache.org" <comm...@mxnet.apache.org>'].

[incubator-mxnet] branch master updated: [R] Initializer fix and adjustments to RNN API (#8121)

Reply via email to