I've attached data.restore4.txt, containing the function data.restore4(), which has the same argument list as foreign::data.restore() and is mean to be called by the latter if the first line of the file is "## Dump S Version 4 Dump". It can read version 4 of the 'S data dump' format, which for which S+ uses the file extension ".sdd". It stores the objects it reads in the environment specified by the 'env' argument/.
I think it works pretty well; please report any issues to me. Bill Dunlap TIBCO Software wdunlap tibco.com On Mon, Apr 17, 2017 at 3:20 PM, Daniel Molinari <d.a.molin...@gmail.com> wrote: > Hi all, > > I have several data files provided in mtw format (Minitab) and sdd format > (S-Plus) and I need to read them in R. > > I do not have access either to Minitab or to S-Plus. > > How can I accomplish this task ? > > Thank you, > Daniel > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.
data.restore4 <- function(file, print = FALSE, verbose = FALSE, env = .GlobalEnv) { # Like foreign::data.restore, but for S Version 4 data.dump format # TODO: when creating functions within functions or expressions, make the inner # ones calls to function(), not already-created functions. Splus does # not have lexical scoping so this should not affect behavior, but makes # the new function more R-like. # Dumping the function to a file and sourcing it back in would have the same # effect. # TODO: deal with stored Splus objects that have an implicit class # but no "class" attribute. Except for "matrix" and "array" I don't # think Splus creates such objects, but they exist in the 'data' package # and they depend on getOldClass() to map the class field in the data.dump # to a class vector in the object. E.g., get("wafer", where="data") has # class 'design' which should become attribute class=c("design", "data.frame"). # Some 'ordered' objects are analogous - no class attribute and class field is 'ordered' so you # have to know that 'ordered' means class=c("ordered","factor"). # "factor" and "ordered" may also be stored without a named ".Label" attribute. # (I have dealt with the factor the matrix/array objects are commonly stored without # named attributes - I assume that the structure has length 3 and the attributes # are ".Dims" and ".Dimnames".) origFile <- file if (!inherits(file, "connection")) { file <- file(file, "r") on.exit(close(file)) } lineNo <- 0 nextLine <- function(n = 1) { lineNo <<- lineNo + n readLines(file, n = n) } Verbosely <- function(...) { if (verbose) { message(simpleMessage(paste("(object ", objName, ", line ", lineNo, ") ", paste(..., collapse = " ", sep = ""), sep = ""), sys.call(-1))) } } Stop <- function(...) { stop(simpleError(paste(paste(..., collapse = " ", sep = ""), sep = "", " (object ", objName, ", file ", deparse(summary(file)$description), ", line ", lineNo, ")"), sys.call(-1))) } Recurse <- function(length) { # Never call 'blah <- .data.restore4()' directly as it may return a missing # argument object, which will break '<-' but not lapply. lapply(seq_len(length), function(i) { .data.restore4() }) } constructMissingArgument <- function() formals(function(x)NULL)$x txt <- nextLine() objName <- "<none yet>" if (length(txt) != 1) { Stop("File is empty") } if (txt != "## Dump S Version 4 Dump ##") { Stop("File does not start with '## Dump S Version 4 Dump', so this is not a SV4 data.dump file") } .data.restore4 <- function() { class <- nextLine() mode <- nextLine() length <- as.numeric(tmp <- nextLine()) if (is.na(length) || length%%1 != 0 || length < 0) { Stop("Expected nonnegative integer 'length' at line ", lineNo, " but got ", deparse(tmp)) } if (mode == "character") { ret <- nextLine(length) # convert \\n to newline, \\t to tab, etc. by using parse() vapply(ret, function(string)parse(text=paste0("\"", string, "\""))[[1]], FUN.VALUE="", USE.NAMES=FALSE) } else if (mode == "logical") { txt <- nextLine(length) lglVector <- rep(NA, length) lglVector[txt != "N"] <- as.logical(as.integer(txt[txt != "N"])) lglVector } else if (mode %in% c("integer", "single", "numeric")) { txt <- nextLine(length) txt[txt == "M"] <- "NaN" txt[txt == "I"] <- "Inf" txt[txt == "J"] <- "-Inf" if (mode == "single") { mode <- "numeric" } atomicVector <- rep(as(NA, mode), length) atomicVector[txt != "N"] <- as(txt[txt != "N"], mode) atomicVector } else if (mode == "complex") { txt <- nextLine(length) txt <- gsub("M", "NaN", txt) txt <- gsub("\\<I\\>", "Inf", txt) txt <- gsub("\\<J\\>", "-Inf", txt) atomicVector <- rep(as(NA, mode), length) atomicVector[txt != "N"] <- as(txt[txt != "N"], mode) atomicVector } else if (mode == "list") { vectors <- Recurse(length) vectors } else if (mode == "NULL") { NULL } else if (mode == "structure") { vectors <- Recurse(length) if (class == ".named_I" || class == "named") { if (length != 2) { Stop("expected length of '.named_I' component is 2, but got ", length) } else if (!is.character(vectors[[2]])) { Stop("expected second component of '.named_I' to be character, but got ", deparse(mode(vectors[[2]]))) } vector <- vectors[[1]] names <- vectors[[2]] if (is.call(vector) && identical(vector[[1]], as.name("for"))) { if (length(names) != 3 || !all(names[2:3] == "")) { Stop("expected only first entry of 'names' for 'for' to be non-blank, but got ", deparse(names)) } vector[[2]] <- as.name(names[1]) vector } else if (is.call(vector) && identical(vector[[1]], as.name(".Call"))) { if (length(vector) - 1 != length(names)) { Stop("expected lengths of names and .Call to be the same, but got ", length(vector) - 1, " and ", length(names)) } vector[[2]] <- names[1] names[1] <- "" if (any(names != "")) { names(vector) <- c("", names) } vector } else if (is.call(vector) && identical(vector[[1]], as.name(".Internal"))) { if (length(vector) - 1 != length(names)) { Stop("expected lengths of names and .Internal to be the same, but got ", length(vector) - 1, " and ", length(names)) } Verbosely("Splus call to '.Internal' will not work in R (or TERR)\n") vector[[3]] <- names[2] vector } else if (is.call(vector) && identical(vector[[1]], as.name("function"))) { if (length(vector) - 1 != length(names)) { Stop("expected lengths of argument names and function to be the same, but got ", length(vector) - 1, " and ", length(names)) } func <- function()NULL formals(func) <- as.pairlist( structure(as.list(vector)[-c(1,length(vector))], names=names[-length(names)]) ) body(func) <- vector[[length(vector)]] environment(func) <- env func } else if (is.call(vector) && identical(vector[[1]], as.name("return"))) { # In Splus, names are added to return(x,y) when return has more than one argument Verbosely("Multi-argument returns will fail in R (or TERR): changing return(...) to return(list(...))\n") if (length(vector)-1 != length(names)) { Stop("expected number of returned items length of their name to be the same, but got ", length(vector)-1, " and ", length(names)) } if (any(names != "")) { names(vector) <- c("", names) } vector[[1]] <- as.name("list") call("return", vector) } else { # finally, attributes if (length(vector) != length(names)) { Stop("expected lengths of '.named_I' components to be the same, but got ", length(vector), " and ", length(names)) } names(vector) <- names if (identical(names[1], ".Data")) { # a hack - really want to know if vector had mode "structure" or not if (".Tsp" %in% names) { # ancient Splus objects have dates in .Tsp rounded to 6 significant digits i <- which(".Tsp" == names) if (length(i) != 1) { Stop("Multiple '.Tsp' attributes on object") } tsp <- vector[[i]] if (length(tsp) != 3 || !is.numeric(tsp)) { Stop("'.Tsp' attribute should contain 3 numbers, but got ", deparse(tsp)) } n <- round( (tsp[2] - tsp[1]) * tsp[3] + 1) vector[[i]] <- c(tsp[1], tsp[1] + (n-1) / tsp[3], tsp[3]) if ( abs(tsp[2] - vector[[i]][2])/abs(tsp[2]) > 1e-8 ) { Verbosely("Fixed up rounded '.Tsp' from ", deparse(tsp), " to ", deparse(vector[[i]])) } } do.call(structure, vector, quote = TRUE) } else { vector } } } else if (class %in% c("matrix", "array")) { if (length != 3) { Stop("Expected 'matrix' or 'array' structures to have length 3, but got ", length) } array(vectors[[1]], dim=vectors[[2]], dimnames=vectors[[3]]) } else { vectors # TODO: this is ok within a .Named_I/structure object, but otherwise means we omitted a known class (like 'factor' or 'ordered') } } else if (mode == "name") { if (length != 1) { Stop("expected length of 'name' objects is 1, but got", length) } name <- as.name(nextLine()) # NULL is the NULL object itself in R, but a name bound to it in Splus if (identical(name, as.name("NULL"))) { NULL } else { name } } else if (mode == "call") { callList <- Recurse(length) as.call(callList) } else if (mode == "expression") { exprList <- Recurse(length) as.expression(exprList) } else if (mode %in% c("<-", "=", "<<-", "if", "{", "while", "repeat", "break", "next", "return")) { if (mode == "<<-") { Verbosely("The '<<-' operator acts differently in R (or TERR) and Splus") } as.call(c(list(as.name(mode)), Recurse(length))) } else if (mode == "for") { # Splus: list(loopVar = NULL, quote(sequenceCall), quote(bodyCall)) # R: list(as.name("for"), as.name("loopVar"), quote(sequenceCall), quote(bodyCall)) # In Splus, the loopVar is a name for the list, which gets added later by .named_I as.call(c(list(as.name(mode)), Recurse(length))) } else if (mode == "function") { # As with "for", this will be further processed by .named_I (if it has any arguments) if (length > 1) { as.call(c(list(as.name(mode)), Recurse(length))) } else { func <- function()NULL # body(func) <- .data.restore4() body(func) <- Recurse(length)[[1]] environment(func) <- env func } } else if (mode == ".Call") { # again, must finish processing via .named_I (the C function name will be in names(call)) as.call(c(list(as.name(mode)), Recurse(length))) } else if (mode == "internal") { # again, must finish processing via .named_I (the C function name will be in names(call)) as.call(c(list(as.name(".Internal")), Recurse(length))) } else if (mode == "missing") { constructMissingArgument() } else if (mode == "call with ...") { if (length != 1) { Stop("Expected length of 'call with ...' item to be 1, but it was ", length) } # call <- .data.restore4() call <- Recurse(length)[[1]] if (!is.call(call)) { Stop("Expected child to 'call with ...' to be a call, but it is a ", mode(call), "\n") } call } else if (mode == "comment expression") { if (length != 2) { Stop("Expected length of 'comment expression' is 2, but it was ", length) } commExprList <- Recurse(length) if (!is.character(commExprList[[1]])) { Stop("Expected first component of 'comment expression' to be character, but it was ", mode(commExprList[[1]])) } commExprList[[2]] } else if (mode == "(") { callExpr <- Recurse(length) as.call(callExpr) } else { # What else did I miss? Stop("Unimplemented mode: ", deparse(mode)) } } while (length(objName <- nextLine()) == 1) { if (print) { cat(deparse(objName), ":\n", sep="") } Verbosely("Starting to read\n") obj <- .data.restore4() Verbosely(" class=", deparse(class(obj)), ", size=", object.size(obj), "\n") assign(objName, obj, envir=env) if (print) { cat(" ", class(obj), "\n", sep="") } } origFile }
______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.