hetong007 closed pull request #9803: R Metrics URL: https://github.com/apache/incubator-mxnet/pull/9803
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/R-package/R/metric.R b/R-package/R/metric.R index f8d9c33a726..9e1913a0877 100644 --- a/R-package/R/metric.R +++ b/R-package/R/metric.R @@ -6,7 +6,7 @@ mx.metric.custom <- function(name, feval) { c(0, 0) } update <- function(label, pred, state) { - m <- feval(as.array(label), as.array(pred)) + m <- feval(label, pred) state <- c(state[[1]] + 1, state[[2]] + m) return(state) } @@ -22,69 +22,90 @@ mx.metric.custom <- function(name, feval) { #' #' @export mx.metric.accuracy <- mx.metric.custom("accuracy", function(label, pred) { - ypred = max.col(t(as.array(pred)), tie="first") - return(sum((as.array(label) + 1) == ypred) / length(label)) + pred <- mx.nd.argmax(data = pred, axis = 1, keepdims = F) + res <- mx.nd.mean(label == pred) + return(as.array(res)) }) -#' Helper function for top-k accuracy -is.num.in.vect <- function(vect, num){ - resp <- any(is.element(vect, num)) - return(resp) -} - #' Top-k accuracy metric for classification #' #' @export mx.metric.top_k_accuracy <- mx.metric.custom("top_k_accuracy", function(label, pred, top_k = 5) { - if(top_k == 1){ - return(mx.metric.accuracy(label,pred)) - } else{ - ypred <- apply(pred,2,function(x) order(x, decreasing=TRUE)[seq_len(top_k)]) - ans <- apply(ypred, 2, is.num.in.vect, num = as.array(label + 1)) - acc <- sum(ans)/length(label) - return(acc) - } + label <- mx.nd.reshape(data = label, shape = c(1,0)) + pred <- mx.nd.topk(data = pred, axis = 1, k = top_k, ret_typ = "indices") + pred <- mx.nd.broadcast.equal(lhs = pred, rhs = label) + res <- mx.nd.mean(mx.nd.sum(data = pred, axis = 1, keepdims = F)) + return(as.array(res)) }) #' MSE (Mean Squared Error) metric for regression #' #' @export mx.metric.mse <- mx.metric.custom("mse", function(label, pred) { - res <- mean((label-pred)^2) - return(res) + pred <- mx.nd.reshape(pred, shape = 0) + res <- mx.nd.mean(mx.nd.square(label-pred)) + return(as.array(res)) }) - + #' RMSE (Root Mean Squared Error) metric for regression #' #' @export mx.metric.rmse <- mx.metric.custom("rmse", function(label, pred) { - res <- sqrt(mean((label-pred)^2)) - return(res) + pred <- mx.nd.reshape(pred, shape = 0) + res <- mx.nd.sqrt(mx.nd.mean(mx.nd.square(label-pred))) + return(as.array(res)) }) #' MAE (Mean Absolute Error) metric for regression #' #' @export mx.metric.mae <- mx.metric.custom("mae", function(label, pred) { - res <- mean(abs(label-pred)) - return(res) + pred <- mx.nd.reshape(pred, shape = 0) + res <- mx.nd.mean(mx.nd.abs(label-pred)) + return(as.array(res)) }) #' RMSLE (Root Mean Squared Logarithmic Error) metric for regression #' #' @export mx.metric.rmsle <- mx.metric.custom("rmsle", function(label, pred) { - res <- sqrt(mean((log(pred + 1) - log(label + 1))^2)) - return(res) + pred <- mx.nd.reshape(pred, shape = 0) + res <- mx.nd.sqrt(mx.nd.mean(mx.nd.square(mx.nd.log1p(pred) - mx.nd.log1p(label)))) + return(as.array(res)) }) #' Perplexity metric for language model #' #' @export -mx.metric.Perplexity <- mx.metric.custom("Perplexity", function(label, pred) { - label_probs <- as.array(mx.nd.choose.element.0index(pred, label)) - batch <- length(label_probs) - NLL <- -sum(log(pmax(1e-15, as.array(label_probs)))) / batch - Perplexity <- exp(NLL) - return(Perplexity) +mx.metric.Perplexity <- mx.metric.custom("Perplexity", function(label, pred, mask_element = -1) { + + label <- mx.nd.reshape(label, shape = -1) + pred_probs <- mx.nd.pick(data = pred, index = label, axis = 1) + + mask <- label != mask_element + mask_length <- mx.nd.sum(mask) + + NLL <- -mx.nd.sum(mx.nd.log(pred_probs) * mask) / mask_length + res <- mx.nd.exp(NLL) + return(as.array(res)) }) + +#' LogLoss metric for logistic regression +#' +#' @export +mx.metric.logloss <- mx.metric.custom("logloss", function(label, pred) { + pred <- mx.nd.reshape(pred, shape = 0) + pred <- mx.nd.clip(pred, a_min = 1e-15, a_max = 1-1e-15) + res <- -mx.nd.mean(label * mx.nd.log(pred) + (1-label) * mx.nd.log(1-pred)) + return(as.array(res)) +}) + +#' Accuracy metric for logistic regression +#' +#' @export +mx.metric.logistic_acc <- mx.metric.custom("accuracy", function(label, pred) { + pred <- mx.nd.reshape(pred, shape = 0) > 0.5 + res <- mx.nd.mean(label == pred) + return(as.array(res)) +}) + diff --git a/R-package/R/model.R b/R-package/R/model.R index 01b5ed72835..b461f7973f6 100644 --- a/R-package/R/model.R +++ b/R-package/R/model.R @@ -112,13 +112,14 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, begin.round, end.round, optimizer, train.data, eval.data, metric, epoch.end.callback, batch.end.callback, - kvstore, fixed.param = NULL, verbose = TRUE) { + kvstore, fixed.param, verbose, + metric_cpu) { ndevice <- length(ctx) if(verbose) message("Start training with ", ndevice, " devices") # create the executors input_slice <- mx.model.slice.shape(input.shape, ndevice) output_slice <- mx.model.slice.shape(output.shape, ndevice) - + arg_names <- arguments(symbol) output.names <- names(output.shape) #label_name <- arg_names[endsWith(arg_names, "label")] @@ -126,7 +127,7 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, arg_lst <- list(symbol = symbol, ctx = ctx[[i]], grad.req = "write") arg_lst <- append(arg_lst, input_slice[[i]]$shape) arg_lst <- append(arg_lst, output_slice[[i]]$shape) - arg_lst[["fixed.param"]] = fixed.param + arg_lst[["fixed.param"]] = unique(c(fixed.param, names(input.shape), names(output.shape))) do.call(mx.simple.bind, arg_lst) }) # set the parameters into executors @@ -153,8 +154,10 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, kvstore$init(params.index, train.execs[[1]]$ref.arg.arrays[params.index]) } # Get the input names - + for (iteration in begin.round:end.round) { + # reset training data + train.data$reset() nbatch <- 0 if (!is.null(metric)) { train.metric <- metric$init() @@ -175,13 +178,22 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, } mx.exec.update.arg.arrays(train.execs[[i]], s, match.name=TRUE) } + + # forward pass for (texec in train.execs) { mx.exec.forward(texec, is.train=TRUE) } - # copy outputs to CPU - out.preds <- lapply(train.execs, function(texec) { - mx.nd.copyto(texec$ref.outputs[[1]], mx.cpu()) - }) + + # copy of preds and labels for metric + if (!is.null(metric)) { + preds <- lapply(train.execs, function(texec) {texec$ref.outputs[[1]]}) + labels <- lapply(train.execs, function(texec) {texec$ref.arg.arrays[[output.names[length(output.names)]]]}) + if (metric_cpu) { + preds <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(preds[[i]], mx.cpu())}) + labels <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(labels[[i]], mx.cpu())}) + } + } + # backward pass for (texec in train.execs) { mx.exec.backward(texec) @@ -215,7 +227,9 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, # Update the evaluation metrics if (!is.null(metric)) { for (i in seq_len(ndevice)) { - train.metric <- metric$update(slices[[i]][[length(slices[[i]])]], out.preds[[i]], train.metric) + train.metric <- metric$update(label = labels[[i]], + pred = preds[[i]], + state = train.metric) } } nbatch <- nbatch + 1 @@ -223,13 +237,14 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, batch.end.callback(iteration, nbatch, environment()) } } - # reset training data - train.data$reset() + if (!is.null(metric)) { result <- metric$get(train.metric) if(verbose) message("[", iteration, "] Train-", result$name, "=", result$value) } if (!is.null(eval.data)) { + # reset eval data + eval.data$reset() if (!is.null(metric)) { eval.metric <- metric$init() } @@ -250,16 +265,22 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, for (texec in train.execs) { mx.exec.forward(texec, is.train=FALSE) } - out.preds <- lapply(train.execs, function(texec) { - mx.nd.copyto(texec$ref.outputs[[1]], mx.cpu()) - }) + + # copy of preds and labels for metric and update metric if (!is.null(metric)) { + preds <- lapply(train.execs, function(texec) {texec$ref.outputs[[1]]}) + labels <- lapply(train.execs, function(texec) {texec$ref.arg.arrays[[output.names[length(output.names)]]]}) + if (metric_cpu) { + preds <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(preds[[i]], mx.cpu())}) + labels <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(labels[[i]], mx.cpu())}) + } for (i in seq_len(ndevice)) { - eval.metric <- metric$update(slices[[i]][[length(slices[[i]])]] , out.preds[[i]], eval.metric) + eval.metric <- metric$update(label = labels[[i]], + pred = preds[[i]], + state = eval.metric) } } } - eval.data$reset() if (!is.null(metric)) { result <- metric$get(eval.metric) if(verbose) message("[", iteration, "] Validation-", result$name, "=", result$value) @@ -269,12 +290,12 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, } # get the model out model <- mx.model.extract.model(symbol, train.execs) - + epoch_continue <- TRUE if (!is.null(epoch.end.callback)) { epoch_continue <- epoch.end.callback(iteration, 0, environment(), verbose = verbose) } - + if (!epoch_continue) { break } @@ -291,11 +312,11 @@ mx.model.train <- function(symbol, ctx, input.shape, output.shape, #' @export mx.model.init.params <- function(symbol, input.shape, output.shape, initializer, ctx) { if (!is.MXSymbol(symbol)) stop("symbol needs to be MXSymbol") - + arg_lst <- list(symbol = symbol) arg_lst <- append(arg_lst, input.shape) arg_lst <- append(arg_lst, output.shape) - + slist <- do.call(mx.symbol.infer.shape, arg_lst) if (is.null(slist)) stop("Not enough information to get shapes") arg.params <- mx.init.create(initializer, slist$arg.shapes, ctx, skip.unknown=TRUE) @@ -430,93 +451,95 @@ mx.model.select.layout.predict <- function(X, model) { #' @export mx.model.FeedForward.create <- -function(symbol, X, y=NULL, ctx=NULL, begin.round=1, - num.round=10, optimizer="sgd", - initializer=mx.init.uniform(0.01), - eval.data=NULL, eval.metric=NULL, - epoch.end.callback=NULL, batch.end.callback=NULL, - array.batch.size=128, array.layout="auto", - kvstore = "local", verbose = TRUE, - arg.params = NULL, aux.params = NULL, - input.names=NULL, output.names = NULL, - fixed.param = NULL, allow.extra.params = FALSE, - ...) { - if (is.array(X) || is.matrix(X)) { - if (array.layout == "auto") { - array.layout <- mx.model.select.layout.train(X, y) + function(symbol, X, y=NULL, ctx=NULL, begin.round=1, + num.round=10, optimizer="sgd", + initializer=mx.init.uniform(0.01), + eval.data=NULL, eval.metric=NULL, + epoch.end.callback=NULL, batch.end.callback=NULL, + array.batch.size=128, array.layout="auto", + kvstore = "local", verbose = TRUE, + arg.params = NULL, aux.params = NULL, + input.names=NULL, output.names = NULL, + fixed.param = NULL, allow.extra.params = FALSE, + metric_cpu = TRUE, + ...) { + if (is.array(X) || is.matrix(X)) { + if (array.layout == "auto") { + array.layout <- mx.model.select.layout.train(X, y) + } + if (array.layout == "rowmajor") { + X <- t(X) + } } - if (array.layout == "rowmajor") { - X <- t(X) + X <- mx.model.init.iter(X, y, batch.size=array.batch.size, is.train=TRUE) + if (!X$iter.next()) { + X$reset() + if (!X$iter.next()) stop("Empty input") } - } - X <- mx.model.init.iter(X, y, batch.size=array.batch.size, is.train=TRUE) - if (!X$iter.next()) { - X$reset() - if (!X$iter.next()) stop("Empty input") - } - if (is.null(input.names)) { - input.names <- "data" - } - input.shape <- sapply(input.names, function(n){dim(X$value()[[n]])}, simplify = FALSE) - if (is.null(output.names)) { - arg_names <- arguments(symbol) - output.names <- arg_names[endsWith(arg_names, "label")] - output.shape <- list() - output.shape[[output.names]] <- dim((X$value())$label) - } else { - output.shape <- sapply(output.names, function(n){dim(X$value()[[n]])}, simplify = FALSE) - } - params <- mx.model.init.params(symbol, input.shape, output.shape, initializer, mx.cpu()) - if (!is.null(arg.params)) params$arg.params <- arg.params - if (!is.null(aux.params)) params$aux.params <- aux.params - if (allow.extra.params) { - params$arg.params[!names(params$arg.params) %in% arguments(symbol)] <- NULL - } - if (is.null(ctx)) ctx <- mx.ctx.default() - if (is.mx.context(ctx)) { - ctx <- list(ctx) - } - if (!is.list(ctx)) stop("ctx must be mx.context or list of mx.context") - if (is.character(optimizer)) { - if (is.numeric(input.shape)) { - ndim <- length(input.shape) - batchsize = input.shape[[ndim]] + if (is.null(input.names)) { + input.names <- "data" + } + input.shape <- sapply(input.names, function(n){dim(X$value()[[n]])}, simplify = FALSE) + if (is.null(output.names)) { + arg_names <- arguments(symbol) + output.names <- arg_names[endsWith(arg_names, "label")] + output.shape <- list() + output.shape[[output.names]] <- dim((X$value())$label) } else { - ndim <- length(input.shape[[1]]) - batchsize = input.shape[[1]][[ndim]] + output.shape <- sapply(output.names, function(n){dim(X$value()[[n]])}, simplify = FALSE) } - optimizer <- mx.opt.create(optimizer, rescale.grad=(1/batchsize), ...) - } - if (!is.null(eval.data) && !is.list(eval.data) && !is.mx.dataiter(eval.data)) { - stop("The validation set should be either a mx.io.DataIter or a R list") - } - if (is.list(eval.data)) { - if (is.null(eval.data$data) || is.null(eval.data$label)){ - stop("Please provide the validation set as list(data=R.array, label=R.array)") + params <- mx.model.init.params(symbol, input.shape, output.shape, initializer, mx.cpu()) + if (!is.null(arg.params)) params$arg.params <- arg.params + if (!is.null(aux.params)) params$aux.params <- aux.params + if (allow.extra.params) { + params$arg.params[!names(params$arg.params) %in% arguments(symbol)] <- NULL } - if (is.array(eval.data$data) || is.matrix(eval.data$data)) { - if (array.layout == "auto") { - array.layout <- mx.model.select.layout.train(eval.data$data, eval.data$label) + if (is.null(ctx)) ctx <- mx.ctx.default() + if (is.mx.context(ctx)) { + ctx <- list(ctx) + } + if (!is.list(ctx)) stop("ctx must be mx.context or list of mx.context") + if (is.character(optimizer)) { + if (is.numeric(input.shape)) { + ndim <- length(input.shape) + batchsize = input.shape[[ndim]] + } else { + ndim <- length(input.shape[[1]]) + batchsize = input.shape[[1]][[ndim]] } - if (array.layout == "rowmajor") { - eval.data$data <- t(eval.data$data) + optimizer <- mx.opt.create(optimizer, rescale.grad=(1/batchsize), ...) + } + if (!is.null(eval.data) && !is.list(eval.data) && !is.mx.dataiter(eval.data)) { + stop("The validation set should be either a mx.io.DataIter or a R list") + } + if (is.list(eval.data)) { + if (is.null(eval.data$data) || is.null(eval.data$label)){ + stop("Please provide the validation set as list(data=R.array, label=R.array)") + } + if (is.array(eval.data$data) || is.matrix(eval.data$data)) { + if (array.layout == "auto") { + array.layout <- mx.model.select.layout.train(eval.data$data, eval.data$label) + } + if (array.layout == "rowmajor") { + eval.data$data <- t(eval.data$data) + } } + eval.data <- mx.model.init.iter(eval.data$data, eval.data$label, batch.size=array.batch.size, is.train = TRUE) } - eval.data <- mx.model.init.iter(eval.data$data, eval.data$label, batch.size=array.batch.size, is.train = TRUE) + kvstore <- mx.model.create.kvstore(kvstore, params$arg.params, length(ctx), verbose=verbose) + model <- mx.model.train(symbol, ctx, input.shape, output.shape, + params$arg.params, params$aux.params, + begin.round, num.round, optimizer=optimizer, + train.data=X, eval.data=eval.data, + metric=eval.metric, + epoch.end.callback=epoch.end.callback, + batch.end.callback=batch.end.callback, + kvstore=kvstore, + fixed.param = fixed.param, + verbose=verbose, + metric_cpu = metric_cpu) + return (model) } - kvstore <- mx.model.create.kvstore(kvstore, params$arg.params, length(ctx), verbose=verbose) - model <- mx.model.train(symbol, ctx, input.shape, output.shape, - params$arg.params, params$aux.params, - begin.round, num.round, optimizer=optimizer, - train.data=X, eval.data=eval.data, - metric=eval.metric, - epoch.end.callback=epoch.end.callback, - batch.end.callback=batch.end.callback, - kvstore=kvstore, - fixed.param = fixed.param, - verbose=verbose) - return (model) -} #' Predict the outputs given a model and dataset. #' @@ -552,7 +575,7 @@ predict.MXFeedForwardModel <- function(model, X, ctx = NULL, array.batch.size = if (!X$iter.next()) stop("Cannot predict on empty iterator") dlist = X$value() arg_lst <- list(symbol = model$symbol, ctx = ctx, data = dim(dlist$data), grad.req="null") - + pexec <- do.call(mx.simple.bind, arg_lst) if (allow.extra.params) { model$arg.params[!names(model$arg.params) %in% arguments(model$symbol)] <- NULL @@ -588,7 +611,7 @@ mx.model.load <- function(prefix, iteration) { arg.index <- startsWith(nms, "arg:") aux.index <- startsWith(nms, "aux:") - + if (any(arg.index)) { arg.params <- save.dict[arg.index] names(arg.params) <- substr(nms[arg.index], 5, nchar(nms[arg.index])) diff --git a/R-package/R/model.rnn.R b/R-package/R/model.rnn.R index 3bf1d96dceb..f328d1ba6b7 100644 --- a/R-package/R/model.rnn.R +++ b/R-package/R/model.rnn.R @@ -2,8 +2,9 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, dlist, arg.params, aux.params, grad.req, arg.update.idx, - begin.round, end.round, optimizer, metric, - epoch.end.callback, batch.end.callback, kvstore, verbose = TRUE) { + begin.round, end.round, optimizer, metric, metric_cpu, + epoch.end.callback, batch.end.callback, kvstore, verbose, + gc_freq) { ndevice <- length(ctx) if (verbose) @@ -21,7 +22,7 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, train.execs <- lapply(seq_len(ndevice), function(i) { s <- slices[[i]] mx.symbol.bind(symbol = sym_ini, arg.arrays = c(s, arg.params)[arg.update.idx], - aux.arrays = aux.params, ctx = ctx[[i]], grad.req = grad.req) + aux.arrays = aux.params, ctx = ctx[[i]], grad.req = grad.req) }) # KVStore related stuffs @@ -48,6 +49,7 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, # train over specified number of epochs for (iteration in begin.round:end.round) { nbatch <- 0 + gc() if (!is.null(metric)) { train.metric <- metric$init() } @@ -77,14 +79,22 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, } } + # forward pass for (texec in train.execs) { mx.exec.forward(texec, is.train = TRUE) } - out.preds <- lapply(train.execs, function(texec) { - mx.nd.copyto(texec$ref.outputs[[1]], mx.cpu()) - }) + # copy of preds and labels for metric + if (!is.null(metric)) { + preds <- lapply(train.execs, function(texec) {texec$ref.outputs[[1]]}) + labels <- lapply(train.execs, function(texec) {texec$ref.arg.arrays[[input.names[length(input.names)]]]}) + if (metric_cpu) { + preds <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(preds[[i]], mx.cpu())}) + labels <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(labels[[i]], mx.cpu())}) + } + } + # backward pass for (texec in train.execs) { mx.exec.backward(texec) } @@ -118,12 +128,16 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, # Update the evaluation metrics if (!is.null(metric)) { for (i in seq_len(ndevice)) { - train.metric <- metric$update(label = slices[[i]][[length(slices[[i]])]], - pred = out.preds[[i]], state = train.metric) + train.metric <- metric$update(label = labels[[i]], + pred = preds[[i]], + state = train.metric) } } nbatch <- nbatch + 1 + if (!is.null(gc_freq)) { + if (nbatch %% gc_freq == 0) gc() + } if (!is.null(batch.end.callback)) { batch.end.callback(iteration, nbatch, environment()) @@ -156,8 +170,8 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, train.execs <- lapply(seq_len(ndevice), function(i) { s <- slices[[i]] mx.symbol.bind(symbol = symbol[[names(eval.data$bucketID)]], - arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx], - aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req) + arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx], + aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req) }) } else { for (i in seq_len(ndevice)) { @@ -166,19 +180,23 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, } } + # forward pass for (texec in train.execs) { mx.exec.forward(texec, is.train = FALSE) } - # copy outputs to CPU - out.preds <- lapply(train.execs, function(texec) { - mx.nd.copyto(texec$ref.outputs[[1]], mx.cpu()) - }) - + # copy of preds and labels for metric and update metric if (!is.null(metric)) { + preds <- lapply(train.execs, function(texec) {texec$ref.outputs[[1]]}) + labels <- lapply(train.execs, function(texec) {texec$ref.arg.arrays[[input.names[length(input.names)]]]}) + if (metric_cpu) { + preds <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(preds[[i]], mx.cpu())}) + labels <- lapply(seq_along(train.execs), function(i) {mx.nd.copyto(labels[[i]], mx.cpu())}) + } for (i in seq_len(ndevice)) { - eval.metric <- metric$update(slices[[i]][[length(slices[[i]])]], - out.preds[[i]], eval.metric) + eval.metric <- metric$update(label = labels[[i]], + pred = preds[[i]], + state = eval.metric) } } } @@ -187,7 +205,7 @@ mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, result <- metric$get(eval.metric) if (verbose) { message("[", iteration, "] Validation-", result$name, "=", - result$value) + result$value) } } } else { @@ -232,7 +250,7 @@ mx.model.buckets <- function(symbol, train.data, eval.data = NULL, metric = NULL num.round = 1, begin.round = 1, initializer = mx.init.uniform(0.01), optimizer = "sgd", ctx = NULL, batch.end.callback = NULL, epoch.end.callback = NULL, - kvstore = "local", verbose = TRUE) { + kvstore = "local", verbose = TRUE, metric_cpu = TRUE, gc_freq = NULL) { if (!train.data$iter.next()) { train.data$reset() @@ -324,7 +342,7 @@ mx.model.buckets <- function(symbol, train.data, eval.data = NULL, metric = NULL # kvstore initialization kvstore <- mx.model.create.kvstore(kvstore, params$arg.params, length(ctx), - verbose = verbose) + verbose = verbose) ### Execute training model <- mx.model.train.buckets(symbol = symbol, ctx = ctx, train.data = train.data, eval.data = eval.data, @@ -333,7 +351,7 @@ mx.model.buckets <- function(symbol, train.data, eval.data = NULL, metric = NULL optimizer = optimizer, metric = metric, begin.round = begin.round, end.round = num.round, batch.end.callback = batch.end.callback, epoch.end.callback = epoch.end.callback, - kvstore = kvstore, verbose = verbose) + kvstore = kvstore, verbose = verbose, metric_cpu = metric_cpu, gc_freq = gc_freq) return(model) } diff --git a/R-package/R/rnn.infer.R b/R-package/R/rnn.infer.R index 588056f2eb8..57201e721c0 100644 --- a/R-package/R/rnn.infer.R +++ b/R-package/R/rnn.infer.R @@ -1,7 +1,7 @@ #' Inference of RNN model #' -#' @param infer.data Data iterator created by mx.io.bucket.iter +#' @param infer.data DataIter #' @param model Model used for inference #' @param ctx #' @@ -37,7 +37,7 @@ mx.infer.rnn <- function(infer.data, model, ctx = mx.cpu()) { arguments.ini <- lapply(shapes$arg.shapes, function(shape) { mx.nd.zeros(shape = shape, ctx = mx.cpu()) }) - + arg.params <- model$arg.params arg.params.names <- names(arg.params) aux.params <- model$aux.params @@ -59,7 +59,7 @@ mx.infer.rnn <- function(infer.data, model, ctx = mx.cpu()) { arg_update_idx <- match(arguments, update_names) execs <- mx.symbol.bind(symbol = symbol, arg.arrays = c(dlist, arg.params.fix, arg.params)[arg_update_idx], - aux.arrays = aux.params, ctx = ctx[[1]], grad.req = grad.req) + aux.arrays = aux.params, ctx = ctx[[1]], grad.req = grad.req) # Initial input shapes - need to be adapted for multi-devices - divide highest # dimension by device nb @@ -69,10 +69,10 @@ mx.infer.rnn <- function(infer.data, model, ctx = mx.cpu()) { while (infer.data$iter.next()) { # Get input data slice - dlist <- infer.data$value() #[input.names] + dlist <- infer.data$value()[input.names] execs <- mx.symbol.bind(symbol = symbol, arg.arrays = c(dlist, execs$arg.arrays[arg.params.fix.names], execs$arg.arrays[arg.params.names])[arg_update_idx], - aux.arrays = execs$aux.arrays, ctx = ctx[[1]], grad.req = grad.req) + aux.arrays = execs$aux.arrays, ctx = ctx[[1]], grad.req = grad.req) mx.exec.forward(execs, is.train = FALSE) @@ -225,7 +225,7 @@ mx.infer.rnn.one.unroll <- function(infer.data, # init_state_shapes init_states_names <- arguments[startsWith(arguments, "init_")] - init_states_shapes = lapply(init_states_names, function(x) c(num_hidden, tail(input.shape[[1]], 1))) + init_states_shapes <- lapply(init_states_names, function(x) c(num_hidden, tail(input.shape[[1]], 1))) names(init_states_shapes) <- init_states_names shapes <- symbol$infer.shape(c(input.shape, init_states_shapes)) diff --git a/R-package/tests/testthat/test_model.R b/R-package/tests/testthat/test_model.R index 13e54ff9ce4..6167ed66c41 100644 --- a/R-package/tests/testthat/test_model.R +++ b/R-package/tests/testthat/test_model.R @@ -80,8 +80,9 @@ test_that("Regression", { lro <- mx.symbol.LinearRegressionOutput(fc1) demo.metric.mae <- mx.metric.custom("mae", function(label, pred) { - res <- mean(abs(label - pred)) - return(res) + pred <- mx.nd.reshape(pred, shape = 0) + res <- mx.nd.mean(mx.nd.abs(label-pred)) + return(as.array(res)) }) mx.set.seed(0) model <- mx.model.FeedForward.create(lro, X = train.x, y = train.y, @@ -291,6 +292,8 @@ test_that("Captcha", { captcha_net <- mx.symbol.SoftmaxOutput(data = fc2, label = label, name = "softmax") mx.metric.acc2 <- mx.metric.custom("accuracy", function(label, pred) { + label = as.array(label) + pred = as.array(pred) ypred <- max.col(t(pred)) - 1 ypred <- matrix(ypred, nrow = nrow(label), ncol = ncol(label), byrow = TRUE) return(sum(colSums(label == ypred) == 4)/ncol(label)) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services