Hi Max,

Here's a bit more information regarding the 'memory not mapped' errors which 
occur in caret.

1. The segfault only occurs when knitting a Markdown file in RStudio. When the 
code is run 'normally' in R, everything's fine.
2. The error is very hard to replicate! It only occurs when the following block 
(running train on C5.0) is run after several other calls to train; running the 
block alone in a Markdown file runs fine:
require(repmis)  # For downloading from https
df <- source_data('https://dl.dropboxusercontent.com/u/47973221/data.csv', 
c5.ms <- train(df[,-1],df[,1], method='C5.0Tree', subset=F, trials=10, 
fuzzyThreshold=F, trControl=trainControl(method='repeatedcv', number=10, 
repeats=10, classProbs=TRUE, allowParallel=F))
3. Note that I'm using caret-5.15-61, in order to replicate previous results 
from SVM models (see earlier in thread).

I appreciate that this is probably insufficient information to identify the 
issue, but perhaps the traceback below provides a clue. In the meantime, I'll 
try to create a Markdown file which replicates the problem reliably. 

Thanks in advance for any suggestions or advice.



## R version 3.0.2 (2013-09-25)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
## locale:
## [1] en_NZ.UTF-8/en_NZ.UTF-8/en_NZ.UTF-8/C/en_NZ.UTF-8/en_NZ.UTF-8
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## other attached packages:
##  [1] C50_0.1.0-15    e1071_1.6-1     class_7.3-9     caret_5.15-61  
##  [5] reshape2_1.2.2  plyr_1.8        lattice_0.20-24 foreach_1.4.1  
##  [9] cluster_1.14.4  repmis_0.2.6.2  knitr_1.5      
## loaded via a namespace (and not attached):
##  [1] codetools_0.2-8 compiler_3.0.2  digest_0.6.3    evaluate_0.5.1 
##  [5] formatR_0.10    grid_3.0.2      httr_0.2        iterators_1.0.6
##  [9] RCurl_1.95-4.1  stringr_0.6.2   tools_3.0.2

Error that appears when knitting:

 *** caught segfault ***
address 0x1028f7000, cause 'memory not mapped'

 1: .C("C50", as.character(namesString), as.character(dataString),     
as.character(costString), as.logical(control$subset), as.logical(rules),     
as.integer(control$bands), as.integer(trials), as.logical(control$winnow),     
as.double(control$sample), as.integer(control$seed), 
as.integer(control$noGlobalPruning),     as.double(control$CF), 
as.integer(control$minCases), as.logical(control$fuzzyThreshold),     
as.logical(control$earlyStopping), tree = character(1), rules = character(1),   
  output = character(1), PACKAGE = "C50")
 2: C5.0.default(x = trainX, y = trainY, rules = method == "C5.0Rules",     ...)
 3: C5.0(x = trainX, y = trainY, rules = method == "C5.0Rules", ...)
 4: caret:::createModel(data = dat[modelIndex, , drop = FALSE], method = 
method,     tuneValue = info$loop[parm, , drop = FALSE], obsLevels = lev,     
pp = ppp, custom = ctrl$custom$model, ...)
 5: doTryCatch(return(expr), name, parentenv, handler)
 6: tryCatchOne(expr, names, parentenv, handlers[[1L]])
 7: tryCatchList(expr, classes, parentenv, handlers)
 8: tryCatch(expr, error = function(e) {    call <- conditionCall(e)    if 
(!is.null(call)) {        if (identical(call[[1L]], quote(doTryCatch)))         
    call <- sys.call(-4L)        dcall <- deparse(call)[1L]        prefix <- 
paste("Error in", dcall, ": ")        LONG <- 75L        msg <- 
conditionMessage(e)        sm <- strsplit(msg, "\n")[[1L]]        w <- 14L + 
nchar(dcall, type = "w") + nchar(sm[1L], type = "w")        if (is.na(w))       
      w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L],                 type 
= "b")        if (w > LONG)             prefix <- paste0(prefix, "\n  ")    }   
 else prefix <- "Error : "    msg <- paste0(prefix, conditionMessage(e), "\n")  
  .Internal(seterrmessage(msg[1L]))    if (!silent && 
identical(getOption("show.error.messages"),         TRUE)) {        cat(msg, 
file = stderr())        .Internal(printDeferredWarnings())    }    
invisible(structure(msg, class = "try-error", condition = e))})
 9: try(caret:::createModel(data = dat[modelIndex, , drop = FALSE],     method 
= method, tuneValue = info$loop[parm, , drop = FALSE],     obsLevels = lev, pp 
= ppp, custom = ctrl$custom$model, ...),     silent = TRUE)
10: eval(expr, envir, enclos)
11: eval(xpr, envir = envir)
12: doTryCatch(return(expr), name, parentenv, handler)
13: tryCatchOne(expr, names, parentenv, handlers[[1L]])
14: tryCatchList(expr, classes, parentenv, handlers)
15: tryCatch(eval(xpr, envir = envir), error = function(e) e)
16: doTryCatch(return(expr), name, parentenv, handler)
17: tryCatchOne(expr, names, parentenv, handlers[[1L]])
18: tryCatchList(expr, classes, parentenv, handlers)
19: tryCatch({    repeat {        args <- nextElem(it)        if (obj$verbose) 
{            cat(sprintf("evaluation # %d:\n", i))            print(args)       
 }        for (a in names(args)) assign(a, args[[a]], pos = envir,             
inherits = FALSE)        r <- tryCatch(eval(xpr, envir = envir), error = 
function(e) e)        if (obj$verbose) {            cat("result of evaluating 
expression:\n")            print(r)        }        
tryCatch(accumulator(list(r), i), error = function(e) {            cat("error 
calling combine function:\n")            print(e)            NULL        })     
   i <- i + 1    }}, error = function(e) {    if 
(!identical(conditionMessage(e), "StopIteration"))         
stop(simpleError(conditionMessage(e), expr))})
20: e$fun(obj, substitute(ex), parent.frame(), e$data)
21: foreach(iter = seq(along = resampleIndex), .combine = "c", .verbose = 
FALSE,     .packages = "caret", .errorhandling = "stop") %:% foreach(parm = 
1:nrow(info$loop),     .combine = "c", .verbose = FALSE, .packages = "caret", 
.errorhandling = "stop") %op%     {        library(caret)        if 
(ctrl$verboseIter)             caret:::progress(printed[parm, , drop = FALSE], 
names(resampleIndex),                 iter)        if 
(names(resampleIndex)[iter] != "AllData") {            modelIndex <- 
resampleIndex[[iter]]            holdoutIndex <- ctrl$indexOut[[iter]]        } 
       else {            modelIndex <- 1:nrow(dat)            holdoutIndex <- 
modelIndex        }        if (testing)             cat("pre-model\n")        
mod <- try(caret:::createModel(data = dat[modelIndex,             , drop = 
FALSE], method = method, tuneValue = info$loop[parm,             , drop = 
FALSE], obsLevels = lev, pp = ppp, custom = ctrl$custom$model,             
...), silent = TRUE)        if !
 (class(mod)[1] != "try-error") {            predicted <- 
try(caret:::predictionFunction(method = method,                 modelFit = 
mod$fit, newdata = dat[holdoutIndex,                   !(names(dat) %in% 
c(".outcome", ".modelWeights")),                   drop = FALSE], preProc = 
mod$preProc, param = info$seqParam[[parm]],                 custom = 
ctrl$custom$prediction), silent = TRUE)            if (class(predicted)[1] == 
"try-error") {                wrn <- paste(colnames(printed[parm, , drop = 
FALSE]),                   printed[parm, , drop = FALSE], sep = "=", collapse = 
", ")                wrn <- paste("predictions failed for ", 
names(resampleIndex)[iter],                   ": ", wrn, sep = "")              
  if (ctrl$verboseIter)                   cat(wrn, "\n")                
warning(wrn)                rm(wrn)                nPred <- 
length(holdoutIndex)                if (!is.null(lev)) {                  
predicted <- rep("", nPred)                  predicted[seq!
 (along = predicted)] <- NA                }                else {     
             predicted <- rep(NA, nPred)                }                if 
(!is.null(info$seqParam[[parm]])) {                  tmp <- predicted           
       predicted <- vector(mode = "list", length = nrow(info$seqParam[[parm]]) 
+                     1)                  for (i in seq(along = predicted)) 
predicted[[i]] <- tmp                  rm(tmp)                }            }    
    }        else {            wrn <- paste(colnames(printed[parm, , drop = 
FALSE]),                 printed[parm, , drop = FALSE], sep = "=", collapse = 
", ")            wrn <- paste("model fit failed for ", 
names(resampleIndex)[iter],                 ": ", wrn, sep = "")            if 
(ctrl$verboseIter)                 cat(wrn, "\n")            warning(wrn)       
     rm(wrn)            nPred <- length(holdoutIndex)            if 
(!is.null(lev)) {                predicted <- rep("", nPred)                
predicted[seq(along = predicted)] <- NA            }            else {          
 predicted <- rep(NA, nPred)            }            if 
(!is.null(info$seqParam[[parm]])) {                tmp <- predicted             
   predicted <- vector(mode = "list", length = nrow(info$seqParam[[parm]]) +    
               1)                for (i in seq(along = predicted)) 
predicted[[i]] <- tmp                rm(tmp)            }        }        if 
(testing)             print(head(predicted))        if (ctrl$classProbs) {      
      if (class(mod)[1] != "try-error") {                probValues <- 
caret:::probFunction(method = method,                   modelFit = mod$fit, 
newdata = dat[holdoutIndex,                     !(names(dat) %in% c(".outcome", 
".modelWeights")),                     drop = FALSE], preProc = mod$preProc, 
param = info$seqParam[[parm]],                   custom = 
ctrl$custom$probability)            }            else {                
probValues <- as.data.frame(matrix(NA, nrow = nPred,                   ncol = 
length(lev)))                colnames(!
 probValues) <- lev                if (!is.null(info$seqParam[[parm]]))
 {                  tmp <- probValues                  probValues <- 
vector(mode = "list", length = nrow(info$seqParam[[parm]]) +                    
 1)                  for (i in seq(along = probValues)) probValues[[i]] <- tmp  
                rm(tmp)                }            }            if (testing)   
              print(head(probValues))        }        if (!is.null(info$seq)) { 
           allParam <- caret:::expandParameters(info$loop[parm,                 
, drop = FALSE], info$seqParam[[parm]])            if (method == "ctree")       
          allParam <- allParam[!duplicated(allParam), ,                   drop 
= FALSE]            if (method == "glmnet")                 allParam <- 
allParam[complete.cases(allParam),                   , drop = FALSE]            
predicted <- lapply(predicted, function(x, y, lv) {                if 
(!is.factor(x) & is.character(x))                   x <- 
factor(as.character(x), levels = lv)                data.frame(pred = x, obs = 
y, !
 stringsAsFactors = FALSE)            }, y = dat$.outcome[holdoutIndex], lv = 
lev)            if (testing)                 print(head(predicted))            
if (ctrl$classProbs) {                for (k in seq(along = predicted)) 
predicted[[k]] <- cbind(predicted[[k]],                   probValues[[k]])      
      }            if (ctrl$savePredictions) {                tmpPred <- 
predicted                for (modIndex in seq(along = tmpPred)) {               
   tmpPred[[modIndex]]$rowIndex <- holdoutIndex                  
tmpPred[[modIndex]] <- cbind(tmpPred[[modIndex]],                     
allParam[modIndex, , drop = FALSE])                }                tmpPred <- 
rbind.fill(tmpPred)                tmpPred$Resample <- 
names(resampleIndex)[iter]            }            else tmpPred <- NULL         
   thisResample <- lapply(predicted, ctrl$summaryFunction,                 lev 
= lev, model = method)            if (testing)                 
print(head(thisResample))           !
  if (length(lev) > 1) {                cells <- lapply(predicted, func
tion(x) caret:::flatTable(x$pred,                   x$obs))                for 
(ind in seq(along = cells)) thisResample[[ind]] <- c(thisResample[[ind]],       
            cells[[ind]])            }            thisResample <- 
do.call("rbind", thisResample)            thisResample <- cbind(allParam, 
thisResample)        }        else {            if (is.factor(dat$.outcome))    
             predicted <- factor(as.character(predicted),                   
levels = lev)            tmp <- data.frame(pred = predicted, obs = 
dat$.outcome[holdoutIndex],                 stringsAsFactors = FALSE)           
 names(tmp)[1] <- "pred"            if (ctrl$classProbs)                 tmp <- 
cbind(tmp, probValues)            if (ctrl$savePredictions) {                
tmpPred <- tmp                tmpPred$rowIndex <- holdoutIndex                
tmpPred <- cbind(tmpPred, info$loop[parm, , drop = FALSE])                
tmpPred$Resample <- names(resampleIndex)[iter]            }            else tm!
 pPred <- NULL            thisResample <- ctrl$summaryFunction(tmp, lev = lev,  
               model = method)            if (length(lev) > 1)                 
thisResample <- c(thisResample, caret:::flatTable(tmp$pred,                   
tmp$obs))            thisResample <- as.data.frame(t(thisResample))            
thisResample <- cbind(thisResample, info$loop[parm,                 , drop = 
FALSE])        }        thisResample$Resample <- names(resampleIndex)[iter]     
   if (ctrl$verboseIter)             caret:::progress(printed[parm, , drop = 
FALSE], names(resampleIndex),                 iter, FALSE)        
list(resamples = thisResample, pred = tmpPred)    }
22: nominalTrainWorkflow(dat = trainData, info = trainInfo, method = method,    
 ppOpts = preProcess, ctrl = trControl, lev = classLevels,     ...)
23: train.default(syllm[, syllvars], syllm$Band, method = "C5.0Tree",     
subset = F, trials = 10, fuzzyThreshold = F, trControl = trainControl(method = 
"repeatedcv",         number = 10, repeats = 10, classProbs = TRUE, 
summaryFunction = multiClassSummary,         allowParallel = F))
24: train(syllm[, syllvars], syllm$Band, method = "C5.0Tree", subset = F,     
trials = 10, fuzzyThreshold = F, trControl = trainControl(method = 
"repeatedcv",         number = 10, repeats = 10, classProbs = TRUE, 
summaryFunction = multiClassSummary,         allowParallel = F))
25: eval(expr, envir, enclos)
26: eval(call, envir, enclos)
27: withVisible(eval(call, envir, enclos))
28: withCallingHandlers(withVisible(eval(call, envir, enclos)), warning = 
wHandler,     error = eHandler, message = mHandler)
29: doTryCatch(return(expr), name, parentenv, handler)
30: tryCatchOne(expr, names, parentenv, handlers[[1L]])
31: tryCatchList(expr, classes, parentenv, handlers)
32: tryCatch(expr, error = function(e) {    call <- conditionCall(e)    if 
(!is.null(call)) {        if (identical(call[[1L]], quote(doTryCatch)))         
    call <- sys.call(-4L)        dcall <- deparse(call)[1L]        prefix <- 
paste("Error in", dcall, ": ")        LONG <- 75L        msg <- 
conditionMessage(e)        sm <- strsplit(msg, "\n")[[1L]]        w <- 14L + 
nchar(dcall, type = "w") + nchar(sm[1L], type = "w")        if (is.na(w))       
      w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L],                 type 
= "b")        if (w > LONG)             prefix <- paste0(prefix, "\n  ")    }   
 else prefix <- "Error : "    msg <- paste0(prefix, conditionMessage(e), "\n")  
  .Internal(seterrmessage(msg[1L]))    if (!silent && 
identical(getOption("show.error.messages"),         TRUE)) {        cat(msg, 
file = stderr())        .Internal(printDeferredWarnings())    }    
invisible(structure(msg, class = "try-error", condition = e))})
33: try(f, silent = TRUE)
34: handle(ev <- withCallingHandlers(withVisible(eval(call, envir,     
enclos)), warning = wHandler, error = eHandler, message = mHandler))
35: evaluate_call(expr, parsed$src[[i]], envir = envir, enclos = enclos,     
debug = debug, last = i == length(out), use_try = stop_on_error !=         2L, 
keep_warning = keep_warning, keep_message = keep_message,     output_handler = 
36: evaluate(code, envir = env, new_device = FALSE, keep_warning = 
!isFALSE(options$warning),     keep_message = !isFALSE(options$message), 
stop_on_error = if (options$error &&         options$include) 0L else 2L)
37: in_dir(opts_knit$get("root.dir") %n% input_dir(), evaluate(code,     envir 
= env, new_device = FALSE, keep_warning = !isFALSE(options$warning),     
keep_message = !isFALSE(options$message), stop_on_error = if (options$error &&  
       options$include) 0L else 2L))
38: block_exec(params)
39: call_block(x)
40: process_group.block(group)
41: process_group(group)
42: withCallingHandlers(if (tangle) process_tangle(group) else 
process_group(group),     error = function(e) {        cat(res, sep = "\n", 
file = output %n% "")        message("Quitting from lines ", 
paste(current_lines(i),             collapse = "-"), " (", 
knit_concord$get("infile"),             ") ")    })
43: process_file(text, output)
44: knit("compareclassifiers3.Rmd", encoding = "UTF-8")
aborting ...

On 18/11/2013, at 14:42 , Max Kuhn <mxk...@gmail.com> wrote:

> Andrew,
>> What I still don't quite understand is which accuracy values from train() I 
>> should trust: those using classProbs=T or classProbs=F?
> It depends on whether you need the class probabilities and class
> predictions to match (which they would if classProbs = TRUE).
> Another option is to use a model where this discrepancy does not exist.
>> train often crashes with 'memory map' errors!)?
> I've never seen that. You should describe it more.
> Max

