[R] stop on rows where !is.na(mydata$ti_all)
Dear R experts, I got help to build a loop but there is a bug inside it that causes one part of the mechanism to fail. It should grow once, but if keep growing on rows where $ti_all is not NA. Here is a wall of code that very crudely demonstrates the problem, there is a couple of dim() outputs at the end where you can see how it the second time around keeps adds (2) rows, but this does not happen to row 2, I'm aware this is part of another function, but I can't figure it out for that. The first thing that happen is correct, that 7 rows are added. Any help or guidance would be appreciated. Thanks, Eric lookup - structure(list(c_name = c(1L, 2L, 4L, 5L, 6L, 7L), t_name = structure(1:6, .Label = c(Bob, Julian, Mitt, Ricky, Tom, Victor), class = factor)), .Names = c(c_name, t_name), class = data.frame, row.names = c(1, 2, 3, 4, 5, 6)) mydata - structure(list(id = c(1L, 1L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 6L, 7L, 7L, 7L, 7L, 8L, 9L), time = c(intake_arm_1, v_001_arm_1, intake_arm_1, intake_arm_1, intake_arm_1, v_001_arm_1, v_002_arm_1, v_003_arm_1, v_004_arm_1, v_005_arm_1, v_006_arm_1, v_007_arm_1, intake_arm_1, v_001_arm_1, intake_arm_1, intake_arm_1, v_011_arm_1, v_012_arm_1, v_013_arm_1, intake_arm_1, intake_arm_1), dat_all = c(NA, NA, NA, NA, NA, NA, NA, 2012-09-23, 2012-09-23, 2012-09-02, 2012-09-10, 2012-09-23, NA, NA, NA, NA, 2012-09-23, 2012-09-23, 2012-09-23, NA, NA), ti_all = c(NA, NA, NA, NA, NA, NA, NA, 6L, 44L, 33L, NA, 22L, NA, NA, NA, NA, 65L, NA, 10L, NA, NA), ty_all = c(NA, NA, NA, NA, NA, NA, NA, out_, out_, cma_, NA, cma_, NA, NA, NA, NA, out_, out_, out_, NA, NA), out_c = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), cma_c = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), c_n = c(NA, 1L, NA, NA, NA, NA, NA, 7L, 4L, 7L, NA, 1L, NA, 2L, NA, NA, 7L, 7L, 7L, NA, NA), t_name = c(Tom, NA, Ricky, Ricky, Victor, NA, NA, NA, NA, NA, NA, NA, Julian, NA, Julian, Bob, NA, NA, NA, Mitt, Mitt)), .Names = c(id, time, dat_all, ti_all, ty_all, out_c, cma_c, c_n, t_name), class = data.frame, row.names = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) if(require(plyr)){ print(plyr is loaded correctly) } else { print(trying to install plyr) install.packages('plyr') if(require(plyr)){ print(plyr installed and loaded) } else { stop(could not install plyr) } } newrows - ddply(mydata, .(id), function(subdata) { subdata_ty = subdata[!is.na(subdata$ty_all), ] if (NROW(subdata) == 1) { r = subdata[1, ] c(v_001_arm_1, NA, NA, NA, NA, NA, lookup$c_name[lookup$t_name == r$t_name], NA) } else if (NROW(subdata_ty) 0) { numbers = sapply(strsplit(subdata$time, _), function(l) ifelse(l[1] != intake, as.numeric(l[2]), 0)) newname = paste(c(v, sprintf(%03d, max(numbers) + 1), arm, 1), collapse=_) r1 = subdata[1, ] new_c_n = lookup$c_name[lookup$t_name == r1$t_name] new_out_c = sum(subdata$ty_all == out_ !is.na(subdata$ti_all)) new_cma_c = sum(subdata$ty_all == cma_ !is.na(subdata$ti_all)) new_out_c = ifelse(new_out_c == 0, NA, new_out_c) new_cma_c = ifelse(new_cma_c == 0, NA, new_cma_c) return(c(newname, NA, NA, NA, new_out_c, new_cma_c, new_c_n, NA)) } }) # recombine and sort colnames(newrows) = colnames(mydata) newdata = rbind(mydata, newrows) newdata = newdata[order(newdata$id), ] mydata2 - newdata newrows2 - ddply(mydata2, .(id), function(subdata) { subdata_ty = subdata[!is.na(subdata$ty_all), ] if (NROW(subdata) == 1) { r = subdata[1, ] c(v_001_arm_1, NA, NA, NA, NA, NA, lookup$c_name[lookup$t_name == r$t_name], NA) } else if (NROW(subdata_ty) 0) { numbers = sapply(strsplit(subdata$time, _), function(l) ifelse(l[1] != intake, as.numeric(l[2]), 0)) newname = paste(c(v, sprintf(%03d, max(numbers) + 1), arm, 1), collapse=_) r1 = subdata[1, ] new_c_n = lookup$c_name[lookup$t_name == r1$t_name] new_out_c = sum(subdata$ty_all == out_ !is.na(subdata$ti_all)) new_cma_c = sum(subdata$ty_all == cma_ !is.na(subdata$ti_all)) new_out_c = ifelse(new_out_c == 0, NA, new_out_c) new_cma_c = ifelse(new_cma_c == 0, NA, new_cma_c) return(c(newname, NA, NA, NA, new_out_c, new_cma_c, new_c_n, NA)) } }) # recombine and sort colnames(newrows2) = colnames(mydata2) newdata2 = rbind(mydata2, newrows2) newdata2 = newdata2[order(newdata2$id), ] mydata3 - newdata2 newrows2 - ddply(mydata3, .(id), function(subdata) { subdata_ty = subdata[!is.na(subdata$ty_all), ] if (NROW(subdata) == 1) { r = subdata[1, ] c(v_001_arm_1, NA, NA, NA, NA, NA, lookup$c_name[lookup$t_name ==
Re: [R] Parsing back to API strcuture
Problem solved by Josh O'Brien on stackoverflow, http://stackoverflow.com/questions/12393004/parsing-back-to-messy-api-strcuture/12435389#12435389 some_magic - function(df) { ## Replace NA with , converting column types as needed df[] - lapply(df, function(X) { if(any(is.na(X))) {X[is.na(X)] - ; X} else {X} }) ## Print integers in first column as 2-digit character strings ## (DO NOTE: Hardwiring the number of printed digits here is probably ## inadvisable, though needed to _exactly_ reconstitute RAW.API.) df[[1]] - sprintf(%02.0f, df[[1]]) ## Separately build header and table body, then suture them together l1 - paste(names(df), collapse=,) l2 - capture.output(write.table(df, sep=,, col.names=FALSE, row.names=FALSE)) out - paste0(c(l1, l2, ), collapse=\n) ## Reattach attributes att - list(`Content-Type` = structure(c(text/html, utf-8), .Names = c(, charset))) attributes(out) - att out } identical(some_magic(df), RAW.API) # [1] TRUE On Thu, Sep 13, 2012 at 11:32 AM, Eric Fail eric.f...@gmx.us wrote: Dear Jim, Thank you for your response I appreciate your effort! It is close, I must admit that. What I am looking for is an object that is identical to 'RAW.API,' or at least in the stricture (I guess i do not need the ,`Content-Type` = structure(c(text/html, utf-8), .Names = c(, charset))) part. When I investigate 'x.out' it also have the NA's. I've tried to fix it, but I had to give up. It is strange because getting there seems so easy (warning false logic!). Here is what I got on my looong and alternative route in the hope that someone on the list might be able to help RAW.API - structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n, `Content-Type` = structure(c(text/html, utf-8), .Names = c(,charset))) # I used an alternative way of converting it to a dataset to keep the leading 0 in the id variables x - read.table(file = textConnection(RAW.API ), header = TRUE, sep = ,, na.strings = , stringsAsFactors = FALSE, colClasses =character) x # now put it back into the same string; write.csv does quote alphanumerics write.csv(x, textConnection('output', 'w'), row.names = FALSE) unlockBinding(output, env = .GlobalEnv) # fixes the problem with the header output[1] - gsub(\\\, , output[1]) # removes NAs output - gsub(NA, \\, output) # removes \ at the beginning of each line output - gsub(^\\\, , output) # removes an at the end of each line output - gsub(\\\$, , output) # same as before x.out - paste(output, collapse = '\n\') # adds an line break at the end x.out - gsub($, \n, x.out) # so much manual gsub ... Any help would be very much appreciated. On Wed, Sep 12, 2012 at 5:54 PM, jim holtman jholt...@gmail.com wrote: This is close, but it does quote the header names, but does produce the same dataframe when read back in: RAW.API - structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n, `Content-Type` = structure(c(text/html, utf-8), .Names = c(, charset))) x - read.csv(textConnection(RAW.API), as.is = TRUE) x id event_arm namedob pushed_text pushed_calc complete 1 1 event_1_arm_1 John 1979-05-01 NA2 2 1 event_2_arm_1 John 2012-09-02 abc 1231 3 1 event_3_arm_1 John 2012-09-10 NA2 4 2 event_1_arm_1 Mary 1951-09-10 def 4562 5 2 event_2_arm_1 Mary 1978-09-12 NA2 # now put it back into the same string; write.csv does quote alphanumerics write.csv(x, textConnection('output', 'w'), row.names = FALSE) x.out - paste(output, collapse = '\n') # read it back in to show it is the same x.in - read.csv(textConnection(x.out), as.is = TRUE) x.in id event_arm namedob pushed_text pushed_calc complete 1 1 event_1_arm_1 John 1979-05-01 NA2 2 1 event_2_arm_1 John 2012-09-02 abc 1231 3 1 event_3_arm_1 John 2012-09-10 NA2 4 2 event_1_arm_1 Mary 1951-09-10 def 4562 5 2 event_2_arm_1 Mary 1978-09-12 NA2 On Wed, Sep 12, 2012 at 8:21 PM, Eric Fail eric.f...@gmx.us wrote: Dear R experts, I'm reading data from an online database via API and it gets delivered in this messy
Re: [R] Parsing back to API strcuture
Dear Jim, Thank you for your response I appreciate your effort! It is close, I must admit that. What I am looking for is an object that is identical to 'RAW.API,' or at least in the stricture (I guess i do not need the ,`Content-Type` = structure(c(text/html, utf-8), .Names = c(, charset))) part. When I investigate 'x.out' it also have the NA's. I've tried to fix it, but I had to give up. It is strange because getting there seems so easy (warning false logic!). Here is what I got on my looong and alternative route in the hope that someone on the list might be able to help RAW.API - structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n, `Content-Type` = structure(c(text/html, utf-8), .Names = c(,charset))) # I used an alternative way of converting it to a dataset to keep the leading 0 in the id variables x - read.table(file = textConnection(RAW.API ), header = TRUE, sep = ,, na.strings = , stringsAsFactors = FALSE, colClasses =character) x # now put it back into the same string; write.csv does quote alphanumerics write.csv(x, textConnection('output', 'w'), row.names = FALSE) unlockBinding(output, env = .GlobalEnv) # fixes the problem with the header output[1] - gsub(\\\, , output[1]) # removes NAs output - gsub(NA, \\, output) # removes \ at the beginning of each line output - gsub(^\\\, , output) # removes an at the end of each line output - gsub(\\\$, , output) # same as before x.out - paste(output, collapse = '\n\') # adds an line break at the end x.out - gsub($, \n, x.out) # so much manual gsub ... Any help would be very much appreciated. On Wed, Sep 12, 2012 at 5:54 PM, jim holtman jholt...@gmail.com wrote: This is close, but it does quote the header names, but does produce the same dataframe when read back in: RAW.API - structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n, `Content-Type` = structure(c(text/html, utf-8), .Names = c(, charset))) x - read.csv(textConnection(RAW.API), as.is = TRUE) x id event_arm namedob pushed_text pushed_calc complete 1 1 event_1_arm_1 John 1979-05-01 NA2 2 1 event_2_arm_1 John 2012-09-02 abc 1231 3 1 event_3_arm_1 John 2012-09-10 NA2 4 2 event_1_arm_1 Mary 1951-09-10 def 4562 5 2 event_2_arm_1 Mary 1978-09-12 NA2 # now put it back into the same string; write.csv does quote alphanumerics write.csv(x, textConnection('output', 'w'), row.names = FALSE) x.out - paste(output, collapse = '\n') # read it back in to show it is the same x.in - read.csv(textConnection(x.out), as.is = TRUE) x.in id event_arm namedob pushed_text pushed_calc complete 1 1 event_1_arm_1 John 1979-05-01 NA2 2 1 event_2_arm_1 John 2012-09-02 abc 1231 3 1 event_3_arm_1 John 2012-09-10 NA2 4 2 event_1_arm_1 Mary 1951-09-10 def 4562 5 2 event_2_arm_1 Mary 1978-09-12 NA2 On Wed, Sep 12, 2012 at 8:21 PM, Eric Fail eric.f...@gmx.us wrote: Dear R experts, I'm reading data from an online database via API and it gets delivered in this messy comma separated structure, RAW.API - structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n, `Content-Type` = structure(c(text/html, utf-8), .Names = c(, charset))) I have this script that nicely parses it into a data frame, (df - read.table(file = textConnection(RAW.API), header = TRUE, sep = ,, na.strings = , stringsAsFactors = FALSE)) id event_arm namedob pushed_text pushed_calc complete 1 1 event_1_arm_1 John 1979-05-01NA NA2 2 1 event_2_arm_1 John 2012-09-02 abc 1231 3 1 event_3_arm_1 John 2012-09-10NA NA2 4 2 event_1_arm_1 Mary 1951-09-10 def 4562 5 2 event_2_arm_1 Mary 1978-09-12NA NA2 I then do some calculations and write them to pushed_text and pushed_calc whereafter I need to format the data back to the messy comma separated structure
[R] Parsing back to API strcuture
Dear R experts, I'm reading data from an online database via API and it gets delivered in this messy comma separated structure, RAW.API - structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n, `Content-Type` = structure(c(text/html, utf-8), .Names = c(, charset))) I have this script that nicely parses it into a data frame, (df - read.table(file = textConnection(RAW.API), header = TRUE, sep = ,, na.strings = , stringsAsFactors = FALSE)) id event_arm name dob pushed_text pushed_calc complete 1 1 event_1_arm_1 John 1979-05-01 NA NA 2 2 1 event_2_arm_1 John 2012-09-02 abc 123 1 3 1 event_3_arm_1 John 2012-09-10 NA NA 2 4 2 event_1_arm_1 Mary 1951-09-10 def 456 2 5 2 event_2_arm_1 Mary 1978-09-12 NA NA 2 I then do some calculations and write them to pushed_text and pushed_calc whereafter I need to format the data back to the messy comma separated structure it came in. I imagine something like this, API.back - `some magic command`(df, ...) identical(RAW.API, API.back) [1] TRUE Some command that can format my data from the data frame I made, df, back to the structure that the raw API-object came in, RAW.API. Any help would be appreciated. Thanks for reading. Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] subtract a list of vectors from a list of data.frames in an elegant way
a working solution to the problem, a - DBquery[names(lookup)] mother.of.lookup - list() for(string in names(a)) { a[[string]] - names(a[[string]]) mother.of.lookup[[string]] - setdiff(a[[string]], lookup[[string]]) } identical(mother.of.lookup, result) It might not be the most elegant solution, but it works. Best, Eric On Thu, Mar 29, 2012 at 4:07 AM, Jim Holtman jholt...@gmail.com wrote: ?setdiff Sent from my iPad On Mar 29, 2012, at 4:28, Eric Fail eric.f...@gmx.us wrote: Dear R experts, I've realized that it might not be possible to define a negative SELCET statement in a SQL call so now I'm looking for the smoothest way to generate a list of what I would like from my large database by first pulling all the names with a query like this SELECT top 1 * FROM your_table (thank you Bart Joosen for the idea) and then subtract the variables I am not allow to pull manually ending up with a 'positive' definition of what I want, something I can use in a SQL SELCT statement (see my email on this list from yesterday for more on that). When I query the database for the variable names I get something similar to 'DBquery' in my working example below, but considerable longer with over 2400 hundred variables. As I only need to remove two or three variables I would like to define a lookup table (like the list 'lookup' in my example) and subtract that from my data base query. Now to my question. Is there a way I can subtract one list from another? Like setoff or alike? I would like to end up with a list like the one shown in my example called 'result.' In short, I would like to subtract 'lookup' from 'DBquery' and end up with 'result,' please note that 'result' is a list fo vecktors and not a list of dataframes. In my real life example DBquery is considerable longer so defining that by hand would make a really really long syntax. Hope someone know some smart function that I can use to solve my problem in an elegant way. Thanks for reading. Erick ## begin R code ## DBquery - list(tableA=data.frame(id = numeric(0), atwin = numeric(0), atrout = numeric(0)), tableB=data.frame(id = numeric(0), mq = numeric(0), z = numeric(0), m = numeric(0)), tableC=data.frame(V1 = numeric(0), mfn = numeric(0), iiff = numeric(0)), tableD=data.frame(id = numeric(0), msf = numeric(0), oom = numeric(0))) lookup - list(tableA= c('atwin', 'atrout'), tableB= c('m', 'z'), tableC= 'ALL') ### ... result - list(tableA= c('id'), tableB= c('id', 'mq'), tableC= c('V1', 'mfn', 'iiff')) __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] subtract a list of vectors from a list of data.frames in an elegant way
Dear R experts, I've realized that it might not be possible to define a negative SELCET statement in a SQL call so now I'm looking for the smoothest way to generate a list of what I would like from my large database by first pulling all the names with a query like this SELECT top 1 * FROM your_table (thank you Bart Joosen for the idea) and then subtract the variables I am not allow to pull manually ending up with a 'positive' definition of what I want, something I can use in a SQL SELCT statement (see my email on this list from yesterday for more on that). When I query the database for the variable names I get something similar to 'DBquery' in my working example below, but considerable longer with over 2400 hundred variables. As I only need to remove two or three variables I would like to define a lookup table (like the list 'lookup' in my example) and subtract that from my data base query. Now to my question. Is there a way I can subtract one list from another? Like setoff or alike? I would like to end up with a list like the one shown in my example called 'result.' In short, I would like to subtract 'lookup' from 'DBquery' and end up with 'result,' please note that 'result' is a list fo vecktors and not a list of dataframes. In my real life example DBquery is considerable longer so defining that by hand would make a really really long syntax. Hope someone know some smart function that I can use to solve my problem in an elegant way. Thanks for reading. Erick ## begin R code ## DBquery - list(tableA=data.frame(id = numeric(0), atwin = numeric(0), atrout = numeric(0)), tableB=data.frame(id = numeric(0), mq = numeric(0), z = numeric(0), m = numeric(0)), tableC=data.frame(V1 = numeric(0), mfn = numeric(0), iiff = numeric(0)), tableD=data.frame(id = numeric(0), msf = numeric(0), oom = numeric(0))) lookup - list(tableA= c('atwin', 'atrout'), tableB= c('m', 'z'), tableC= 'ALL') ### ... result - list(tableA= c('id'), tableB= c('id', 'mq'), tableC= c('V1', 'mfn', 'iiff')) __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] Is it possible to de-select with sqlQuery from the RODBC library?
Thank you Bart for your idea, the thing is that I have a large number of tables and I would like to avoid having to pull them at all. I currently have a list that I use as a lookup table in a loop with an if else statement to sort between tables I want to sqlFetch (take everything) and tables where I sqlQuery (only want part of the table). The names of the list itself constitute a positive definition of what tables I want to pull. Here in a reduced illustrative example of what I am doing. My problem is still that I would like to make negative selection so I get everything except 'V1010' and 'V1012' in table 3, and so forth (please see below). ## illustrative R example ## q.lookup - list(Table3 = c('V1010', 'V1012'), Table7 = c('V1040', 'V1052'), Table9 = 'ALL') dfn - list() for(i in names(q.lookup)) { if (q.lookup[[i]][1]==ALL) { query - names(q.lookup[1]) table.n - sqlFetch(mdbConnect, query) } else if (q.lookup[[i]][1]!=ALL) { query - paste(select, paste(q.lookup[[i]], collapse=, ), from, names(q.lookup[i])) table.n - sqlQuery(mdbConnect, query) } else print(your SQL call is gone haywire, fix it in line 193-204) dfn[[i]] - table.n } ### end of illustrative R example I could use your solution, I think, but if at all possible I would prefer to figure out how to make a negative SQL statement (I still imagine that there is some reverse function of the SQL select statement somewhere out there). With hight hopes. Eric On Wed, Mar 28, 2012 at 2:24 AM, Bart Joosen bartjoo...@hotmail.com wrote: What you can do: SELECT top 1 * FROM your_table; Use this selection to find all your column names in R then paste everything together without the names you don't want and then run your query. Bart -- View this message in context: http://r.789695.n4.nabble.com/Is-it-possible-to-de-select-with-sqlQuery-from-the-RODBC-library-tp4511189p4511800.html Sent from the R help mailing list archive at Nabble.com. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] Is it possible to de-select with sqlQuery from the RODBC library?
Dear R-list, I'm queering a M$ Access database with the sqlQuery function from the RODBC library. As I cannot make a working example with a database here is an illustrative example, library(RODBC) mdbConnect-odbcConnectAccess(S:/data/ ... /databse.mdb) data - sqlQuery(mdbConnect, select id, DOB, V1, V2, ..., V1009, V1011, V1013 from someTable) I want everything in the table (someTable), except 'V1010' and 'V1012,' but I can't figure out how to make a negative or reverse SQL select statement. I have a lot of someTables and I have two or three variables in each table that I do not want R to fetch, Is there a way to define a reverse select in SQL? One would imagine it would look something like this, data - sqlQuery(mdbConnect, deselect V1010, V1o12 from someTable) Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] Sankey Diagrams in R
Dear R-list, I am trying to visualize where the dropout happens in our patient flow. We are currently using traditional flowcharts and it bothers me that I can't visualize both the percentage and the flow in one diagram. The other day I came across some interesting diagrams doing exactly what I wanted, they had both flow and percentages visualized on one diagram. Here is some nice examples apparently made with ‘sankeypython’ http://www.sankey-diagrams.com/tag/software/ It didn't take long to find a blog where a Ruser (thanks!) had posted an R script that actually produces an Sankey Diagram in R http://biologicalposteriors.blogspot.com/2010/07/sankey-diagrams-in-r.html See below for working example. My questions are, is this the most updated Sankey Diagram-script we have in the R community? Is there a better way to visualize flow and percentages in one diagram in R? Thanks, Eric ## the working example ## th, https://tonybreyal.wordpress.com/2011/11/24/source_https-sourcing-an-r-script-from-github/ sourc.https - function(url, ...) { # load package require(RCurl) # install.packages(c(RCurl), dependencies = TRUE) # parse and evaluate each .R script sapply(c(url, ...), function(u) { eval(parse(text = getURL(u, followlocation = TRUE, cainfo = system.file(CurlSSL, cacert.pem, package = RCurl))), envir = .GlobalEnv) }) } # Example from https://gist.github.com/1423501 sourc.https(https://raw.github.com/gist/1423501/55b3c6f11e4918cb6264492528b1ad01c429e581/Sankey.R;) # My example (there is another example inside Sankey.R): inputs = c(6, 144) losses = c(6,47,14,7, 7, 35, 34) unit = n = labels = c(Transfers, Referrals\n, Unable to Engage, Consultation only, Did not complete the intake, Did not engage in Treatment, Discontinued Mid-Treatment, Completed Treatment, Active in \nTreatment) SankeyR(inputs,losses,unit,labels) # Clean up my mess rm(inputs, labels, losses, SankeyR, sourc.https, unit) __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] looping over string of frames when importing with 'sqlFetch' from a Microsoft Access database
Dear R-list, I am trying to import (all) frames from a Microsoft Access database as individual data frames in a fancy loop, but I'm having troubles figuring out how to use the 'sqlFetch' from the RODBS package in a loop (mostly because I can't figure out how to loop over elements (I came from stata) I would very much appreciate if anyone on the list could help me solve this problem, as it is an issue of connecting to a database I can't really make a working example, please bear with me. ### not-working R code ### ## first I establish a connection to my database mdbConnect-odbcConnectAccess(C:\\... \\database.mdb) ## then I read of all the table names stringTables - sqlTables(mdbConnect, tableType=c(TABLE))$TABLE_NAME ## and then I meet the wall ... for(i.Frame in stringTables) { i.Frame - sqlFetch(mdbConnect, i.Frame) } ## this broken loop creates one data frame called containing the 'i.Frame' containing the last frame in the 'stringTables.' I'm not doing this correct. ## the final step. DF - stringTables[[1]] for ( .df in stringTables) { DF -merge(DF,.df, by.x=uniqueid, by.y=uniqueid, all=T) } ### end of not-working R code ### Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] looping over string of frames when importing with 'sqlFetch' from a Microsoft Access database
Problem solved thanks to Peter Langfelder's response to Adel ESSAFI. This is what should be in the loop, dfn = list(); for (i in length(stringTables) { dfn[[ http://piratepad.net/ii http://piratepad.net/i]] - sqlFetch(mdbConnect, stringTables[[ http://piratepad.net/iihttp://piratepad.net/i ]]) } Thanks, Eric On Fri, Feb 24, 2012 at 3:19 PM, Eric Fail eric.f...@gmx.us wrote: Dear R-list, I am trying to import (all) frames from a Microsoft Access database as individual data frames in a fancy loop, but I'm having troubles figuring out how to use the 'sqlFetch' from the RODBS package in a loop (mostly because I can't figure out how to loop over elements (I came from stata) I would very much appreciate if anyone on the list could help me solve this problem, as it is an issue of connecting to a database I can't really make a working example, please bear with me. ### not-working R code ### ## first I establish a connection to my database mdbConnect-odbcConnectAccess(C:\\... \\database.mdb) ## then I read of all the table names stringTables - sqlTables(mdbConnect, tableType=c(TABLE))$TABLE_NAME ## and then I meet the wall ... for(i.Frame in stringTables) { i.Frame - sqlFetch(mdbConnect, i.Frame) } ## this broken loop creates one data frame called containing the 'i.Frame' containing the last frame in the 'stringTables.' I'm not doing this correct. ## the final step. DF - stringTables[[1]] for ( .df in stringTables) { DF -merge(DF,.df, by.x=uniqueid, by.y=uniqueid, all=T) } ### end of not-working R code ### Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] looping over string of frames when importing with 'sqlFetch' from a Microsoft Access database
correction, this is the working R script, forgot '1: ... )', dfn = list(); for (i in 1:length(stringTables)) { dfn[[i]] - sqlFetch(mdbConnect, stringTables[[i]]) } On Fri, Feb 24, 2012 at 4:48 PM, Eric Fail eric.f...@gmx.us wrote: Problem solved thanks to Peter Langfelder's response to Adel ESSAFI. This is what should be in the loop, dfn = list(); for (i in length(stringTables) { dfn[[ http://piratepad.net/ii http://piratepad.net/i]] - sqlFetch(mdbConnect, stringTables[[ http://piratepad.net/iihttp://piratepad.net/i ]]) } Thanks, Eric On Fri, Feb 24, 2012 at 3:19 PM, Eric Fail eric.f...@gmx.us wrote: Dear R-list, I am trying to import (all) frames from a Microsoft Access database as individual data frames in a fancy loop, but I'm having troubles figuring out how to use the 'sqlFetch' from the RODBS package in a loop (mostly because I can't figure out how to loop over elements (I came from stata) I would very much appreciate if anyone on the list could help me solve this problem, as it is an issue of connecting to a database I can't really make a working example, please bear with me. ### not-working R code ### ## first I establish a connection to my database mdbConnect-odbcConnectAccess(C:\\... \\database.mdb) ## then I read of all the table names stringTables - sqlTables(mdbConnect, tableType=c(TABLE))$TABLE_NAME ## and then I meet the wall ... for(i.Frame in stringTables) { i.Frame - sqlFetch(mdbConnect, i.Frame) } ## this broken loop creates one data frame called containing the 'i.Frame' containing the last frame in the 'stringTables.' I'm not doing this correct. ## the final step. DF - stringTables[[1]] for ( .df in stringTables) { DF -merge(DF,.df, by.x=uniqueid, by.y=uniqueid, all=T) } ### end of not-working R code ### Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] plotting and coloring longitudinal data with three time points (ggplot2)
Dear list, I have been struggling with this for some time now, and for the last hour I have been struggling to make a working example for the list. I hope someone out there have some experience with plotting longitudinal data that they will share. My data is some patient data with three different time stamps. First the patients are identified at different times (first time stamp). Second, they go through an assessment phase and begin their treatment (time stamp 2). Finally they are admitted from the hospital at some point (time stamp 3), I would like to make a spaghetti plot with the assessment phase in one color and the treatment phase in another color. I used ggplot2, and with this example data and only two time points; it works fine (I call it my working example), library(ggplot2) df - data.frame( date = seq(Sys.Date(), len=104, by=1 day)[sample(104, 52)], patient = factor(rep(1:26, 2), labels = LETTERS) ) df - df[order(df$date), ] dt - qplot(date, patient, data=df, geom=line) dt + scale_x_date() df[ which(df$patient=='E'), c(patient, date)] But, if I have three time points, R, for some reason I do not yet understand, add the two second time points in some funny way. Finally, when that is solved; how do I colorize the different parts of the line so the assessment phase gets one color and the treatment phase another? I want to be able to show how long we have been in contact with our patients, how much of the contact time that was assessment and how much that was actual treatment. Below is an example (I call it the not-working example) df2 - data.frame( date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)], patient2 = factor(rep(1:26, 3), labels = LETTERS) ) df2 - df2[order(df2$date2), ] dt2 - qplot(date2, patient2, data=df2, geom=line) dt2 + scale_x_date(major=months, minor=weeks) df2[ which(df2$patient2=='B'), c(patient2, date2)] If someone can point me in a direction or tell me what I am doing wrong or if there is some amazing package for plotting longitudinal data I would be very grateful. Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] plotting and coloring longitudinal data with three time points (ggplot2)
Thank you for solving my problem, it worked out beautifully. This was exactly what I was looking for, the ggplot2 package keeps impressing me. Thanks, Eric On Wed, Dec 7, 2011 at 6:01 AM, Hadley Wickham had...@rice.edu wrote: On Wed, Dec 7, 2011 at 4:02 AM, Eric Fail eric.f...@gmx.us wrote: Dear list, I have been struggling with this for some time now, and for the last hour I have been struggling to make a working example for the list. I hope someone out there have some experience with plotting longitudinal data that they will share. My data is some patient data with three different time stamps. First the patients are identified at different times (first time stamp). Second, they go through an assessment phase and begin their treatment (time stamp 2). Finally they are admitted from the hospital at some point (time stamp 3), I would like to make a spaghetti plot with the assessment phase in one color and the treatment phase in another color. I used ggplot2, and with this example data and only two time points; it works fine (I call it my working example), library(ggplot2) df - data.frame( date = seq(Sys.Date(), len=104, by=1 day)[sample(104, 52)], patient = factor(rep(1:26, 2), labels = LETTERS) ) df - df[order(df$date), ] dt - qplot(date, patient, data=df, geom=line) dt + scale_x_date() df[ which(df$patient=='E'), c(patient, date)] But, if I have three time points, R, for some reason I do not yet understand, add the two second time points in some funny way. Finally, when that is solved; how do I colorize the different parts of the line so the assessment phase gets one color and the treatment phase another? I want to be able to show how long we have been in contact with our patients, how much of the contact time that was assessment and how much that was actual treatment. Below is an example (I call it the not-working example) df2 - data.frame( date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)], patient2 = factor(rep(1:26, 3), labels = LETTERS) ) df2 - df2[order(df2$date2), ] dt2 - qplot(date2, patient2, data=df2, geom=line) dt2 + scale_x_date(major=months, minor=weeks) df2[ which(df2$patient2=='B'), c(patient2, date2)] Did you mean something like this? library(ggplot2) library(plyr) df2 - data.frame( date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)], patient2 = factor(rep(1:26, 3), labels = LETTERS) ) df2 - ddply(df2, patient2, mutate, visit = order(date2)) qplot(date2, patient2, data = df2, geom = line) + geom_point(aes(colour = factor(visit))) # or this? library(ggplot2) library(plyr) df2 - data.frame( date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)], patient2 = factor(rep(1:26, 3), labels = LETTERS) ) df2 - ddply(df2, patient2, mutate, visit = order(date2)) qplot(date2, patient2, data = df2, geom = line, colour = factor(visit), group = patient2) # Obviously the lines are drawn between the observations so you only see the first two visits. Hadley -- Assistant Professor / Dobelman Family Junior Chair Department of Statistics / Rice University http://had.co.nz/ __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] merge with origin information in new variable names
Is there anyone out there who can suggest a way to solve this problem? Thanks, Esben On Sun, Apr 24, 2011 at 8:53 PM, Jeff Newmiller jdnew...@dcn.davis.ca.us wrote: Merge only lets you combine two tables at a time, but it does have a suffix argument that is intended to address your concern, but only for variable names that would conflict. In your example, the id variables are all sequenced exactly the same, so you could actually use cbind rather than merge. However, whether you use merge or cbind, I think the most direct route to your desired result is to rename the data columns before you combine them, using the names function on the left hand side of an assignment with a vector of new names on the right. --- Jeff Newmiller The . . Go Live... DCN:jdnew...@dcn.davis.ca.us Basics: ##.#. ##.#. Live Go... Live: OO#.. Dead: OO#.. Playing Research Engineer (Solar/Batteries O.O#. #.O#. with /Software/Embedded Controllers) .OO#. .OO#. rocks...1k --- Sent from my phone. Please excuse my brevity. Eric Fail eric.f...@gmx.com wrote: Dear R-list, Here is my simple question, I have n data frames that I would like to merge, but I can't figure out how to add information about the origin of the variable(s). Here is my problem, DF.wave.1 - data.frame(id=1:10,var.A=sample(letters[1:4],10,TRUE)) DF.wave.2 - data.frame(id=1:10,var.M=sample(letters[5:8],10,TRUE)) DF.wave.3 - data.frame(id=1:10,var.A=sample(letters[5:8],10,TRUE)) Now; I would like to merge the three dataframes into one, but append a suffix to the individual variables names about thir origin. DF.wave.all - merge(DF.wave.1,DF.wave.2,DF.wave.3,by=id, [what to do here]) In other words, I would like it to loook like this. DF.wave.all id var.A.wave.1 var.M.wave.2 var.A.wave.3 1 1chj 2 2cej 3 3cgk 4 4cej 5 5cgi 6 6dek 7 7chk 8 8bgj 9 9bfi 10 10dhi Is there a command I can use directly in merge? 'suffixes' isn't really handy here. Thanks, Eric R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] merge with origin information in new variable names
Dear R-list, Here is my simple question, I have n data frames that I would like to merge, but I can't figure out how to add information about the origin of the variable(s). Here is my problem, DF.wave.1 - data.frame(id=1:10,var.A=sample(letters[1:4],10,TRUE)) DF.wave.2 - data.frame(id=1:10,var.M=sample(letters[5:8],10,TRUE)) DF.wave.3 - data.frame(id=1:10,var.A=sample(letters[5:8],10,TRUE)) Now; I would like to merge the three dataframes into one, but append a suffix to the individual variables names about thir origin. DF.wave.all - merge(DF.wave.1,DF.wave.2,DF.wave.3,by=id, [what to do here]) In other words, I would like it to loook like this. DF.wave.all id var.A.wave.1 var.M.wave.2 var.A.wave.3 1 1chj 2 2cej 3 3cgk 4 4cej 5 5cgi 6 6dek 7 7chk 8 8bgj 9 9bfi 10 10dhi Is there a command I can use directly in merge? 'suffixes' isn't really handy here. Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] Parsing question, partly comma separated partly underscore separated string
Thanks to Gabor Grothendieck and Dennis Murphy I can now solve first part of my problem and already impress my colleagues with the R-program below (I know it could be written in a smarter way, but I am learning). It reads my partly comma separated partly underscore separated string and cleans it up in a very need way. Regardless of my inability to write tight code I moved on to the second part of my quest, to put it all in to a loop to be able to loop over my approximately 100 .txt files in /usr2/username/data/ I got started with list.files() and my loop is more or less working, but I got stuck on the last cbind part. Is there a friendly R-hacker out there that would be willing to take a look at my loop below*2? Thanks, Eric ### #### ## The answer to the first part of my question ## #### ### Line - readLines(file(/usr2/efail/data/example.txt)) s - strsplit(Line, ZZ_)[[1]] s2 - sub(BLOCK.*, BLOCK, s) s3 - sub(@9z.svg, , s2) s4 - gsub(_, ,, s3) s5 - read.table(textConnection(s4[1]), sep = ,) DF - read.table(textConnection(s4), skip = 1, sep = ,, as.is = TRUE) DF$block - head(cumsum(c(, DF$V8) == BLOCK)+1, -1) DF$run - ave(DF$block, DF$block, FUN = seq_along) DF$V8 - NULL names(DF) - c(IngNam, Tx, Ty, Treatment, x, y, Y, BLOCK, RUN) DF$ID - s5$V1 DF # ## ## ## The PARTLY WORKING loop## ## ## # fname - list.files(/usr2/efail/data,pattern=.txt, full.names = TRUE, recursive =TRUE, ignore.case = TRUE) for (sp in 1:length(fname)) { Line - readLines(file(fname[sp])) s - strsplit(Line, ZZ_)[[1]] s2 - sub(BLOCK.*, BLOCK, s) s3 - sub(@9z.svg, , s2) s4 - gsub(_, ,, s3) s5 - read.table(textConnection(s4[1]), sep = ,) DF - read.table(textConnection(s4), skip = 1, sep = ,, as.is = TRUE) DF$block - head(cumsum(c(, DF$V8) == BLOCK)+1, -1) DF$run - ave(DF$block, DF$block, FUN = seq_along) DF$V8 - NULL names(DF) - c(IngNam, Tx, Ty, Treatment, x, y, Y, BLOCK, RUN) DF$ID - s5$V1 FINAL.DF - cbind(DF… ## This is where I got stuck. } On Mon, Mar 7, 2011 at 8:18 AM, Gabor Grothendieck ggrothendi...@gmail.com wrote: On Sun, Mar 6, 2011 at 10:13 PM, Eric Fail eric.f...@gmx.com wrote: Dear R-list, I have a partly comma separated partly underscore separated string that I am trying to parse into R. Furthermore I have a bunch of them, and they are quite long. I have now spent most of my Sunday trying to figure this out and thought I would try the list to see if someone here would be able to get me started. My data structure looks like this, (in a example.txt file) Subject ID,ExperimentName,2010-04-23,32:34:23,Version 0.4, 640 by 960 pixels, On Device M, M, 3.2.4,zz_373_462_488_...@9z.svg,592,820,3.35,zz_032_288_436_...@9z.svg,332,878,3.66,zz_384_204_433_...@9z.svg,334,824,3.28,zz_365_575_683_...@9z.svg,598,878,3.50,zz_005_480_239_...@9z.svg,630,856,8.03,zz_030_423_394_...@9z.svg,98,846,4.09,zz_033_596_398_...@9z.svg,636,902,3.28,zz_263_064_320_...@9z.svg,570,894,1.26,bl...@9z.svg,322,842,32.96,zz_004_088_403_...@9z.svg,606,908,3.32,zz_703_546_434_...@9z.svg,624,934,2.58,zz_712_348_543_...@9z.svg,20,828,5.36,zz_005_48_239_...@9z.svg,580,830,4.36,zz_310_444_623_...@9z.svg,586,806,0.08,zz_030_423_394_...@9z.svg,350,854,3.84,zz_340_382_539_...@9z.svg,570,894,1.26,bl...@9z.svg,542,840,4.44,zz_345_230_662_...@9z.svg,632,844,2.47,zz_006_335_309_...@9z.svg,96,930,3.63,zz_782_346_746_...@9z.svg,306,850,2.58,zz_334_200_333_...@9z.svg,304,842,3.34,zz_383_506_726_...@9z.svg,622,884,3.84,zz_294_360_448_...@9z.svg,90,858,3.56,zz_334_335_473_...@9z.svg,570,894,1.26,bl...@9z.svg,320,852,4.04, (end of example.txt file) The above is approximate 5% of the length of a full file, and then I got about 100 of them. Please note that the strings end with a comma. I am trying to parse it into something like this ID ImgNam BLOCK RUN Tx Ty Treatment x y Y Subject ID 373 1 1 462 488 TRT 592 820 3.35 Subject ID 32 1 2 288 436 CON 332 878 3.66 Subject ID 384 1 3 204 433 TRT 334 824 3.28 Subject ID 365 1 4 575 683 TRT 598 878 3.5 Subject ID 5 1 5 480 239 CON 630 856 8.03 Subject ID 30 1 6 423 394 CON 98 846 4.09 Subject ID 33 1 7 596 398 CON 636 902 3.28 Subject ID 263 1 8 64 320 TRT 570 894 1.26 Subject ID 4 2 1 88 403 CON 606 908 3.32 Subject ID 703 2 2 546 434 CON 624 934 2.58 Subject ID 712 2 3 348 543 CON 20 828 5.36 Subject ID 5 2 4 48 239 CON 580 830 4.36 Subject ID 310 2 5 444 623 TRT 586 806 0.08 Subject ID 30 2 6 423 394 CON 350 854 3.84 Subject ID 340 2 7 382 539 TRT 570 894 1.26 Subject ID 345 3 1 230 662 TRT 632 844 2.47 Subject ID 6 3 2 335 309 CON 96 930 3.63 Subject ID 782 3 3 346
[R] Parsing question, partly comma separated partly underscore separated string
Dear R-list, I have a partly comma separated partly underscore separated string that I am trying to parse into R. Furthermore I have a bunch of them, and they are quite long. I have now spent most of my Sunday trying to figure this out and thought I would try the list to see if someone here would be able to get me started. My data structure looks like this, (in a example.txt file) Subject ID,ExperimentName,2010-04-23,32:34:23,Version 0.4, 640 by 960 pixels, On Device M, M, 3.2.4,zz_373_462_488_...@9z.svg,592,820,3.35,zz_032_288_436_...@9z.svg,332,878,3.66,zz_384_204_433_...@9z.svg,334,824,3.28,zz_365_575_683_...@9z.svg,598,878,3.50,zz_005_480_239_...@9z.svg,630,856,8.03,zz_030_423_394_...@9z.svg,98,846,4.09,zz_033_596_398_...@9z.svg,636,902,3.28,zz_263_064_320_...@9z.svg,570,894,1.26,bl...@9z.svg,322,842,32.96,zz_004_088_403_...@9z.svg,606,908,3.32,zz_703_546_434_...@9z.svg,624,934,2.58,zz_712_348_543_...@9z.svg,20,828,5.36,zz_005_48_239_...@9z.svg,580,830,4.36,zz_310_444_623_...@9z.svg,586,806,0.08,zz_030_423_394_...@9z.svg,350,854,3.84,zz_340_382_539_...@9z.svg,570,894,1.26,bl...@9z.svg,542,840,4.44,zz_345_230_662_...@9z.svg,632,844,2.47,zz_006_335_309_...@9z.svg,96,930,3.63,zz_782_346_746_...@9z.svg,306,850,2.58,zz_334_200_333_...@9z.svg,304,842,3.34,zz_383_506_726_...@9z.svg,622,884,3.84,zz_294_360_448_...@9z.svg,90,858,3.56,zz_334_335_473_...@9z.svg,570,894,1.26,bl...@9z.svg,320,852,4.04, (end of example.txt file) The above is approximate 5% of the length of a full file, and then I got about 100 of them. Please note that the strings end with a comma. I am trying to parse it into something like this ID ImgNam BLOCK RUN Tx Ty Treatment x y Y Subject ID 373 1 1 462 488 TRT 592 820 3.35 Subject ID 32 1 2 288 436 CON 332 878 3.66 Subject ID 384 1 3 204 433 TRT 334 824 3.28 Subject ID 365 1 4 575 683 TRT 598 878 3.5 Subject ID 5 1 5 480 239 CON 630 856 8.03 Subject ID 30 1 6 423 394 CON 98 846 4.09 Subject ID 33 1 7 596 398 CON 636 902 3.28 Subject ID 263 1 8 64 320 TRT 570 894 1.26 Subject ID 4 2 1 88 403 CON 606 908 3.32 Subject ID 703 2 2 546 434 CON 624 934 2.58 Subject ID 712 2 3 348 543 CON 20 828 5.36 Subject ID 5 2 4 48 239 CON 580 830 4.36 Subject ID 310 2 5 444 623 TRT 586 806 0.08 Subject ID 30 2 6 423 394 CON 350 854 3.84 Subject ID 340 2 7 382 539 TRT 570 894 1.26 Subject ID 345 3 1 230 662 TRT 632 844 2.47 Subject ID 6 3 2 335 309 CON 96 930 3.63 Subject ID 782 3 3 346 746 TRT 306 850 2.58 Subject ID 334 3 4 200 333 TRT 304 842 3.34 Subject ID 383 3 5 506 726 TRT 622 884 3.84 Subject ID 294 3 6 360 448 TRT 90 858 3.56 Subject ID 334 3 7 335 473 TRT 570 894 1.26 I could do it in Excel, but it would take me a week--and it would be stupid--if someone could please help me get started I would very much appreciate it. It would not only benefit me, but my colleagues would see the benefit of R and the R-list in particular. Thanks in advance! Eric -- __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] ggplot2, 'se' variable in geom_errorbar's limits?
Can't anybody give me a hint on how to solve this? I even bought the ggplot2-book, so you could also give a page (or a series of pages). Thanks, Eric On Thu, Feb 17, 2011 at 10:19 AM, Eric Fail eric.f...@gmx.com wrote: Dear R-list I'm working with with geom_errorbar; specifically I'm trying to reproduce the example Hadley Wickham have on http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the page) where he makes an nice plot with errorbars and then draw lines between the points. What confuses me is the 'limits' he defines for the errorbars from the se variable. First he creates a dataset, df - data.frame( trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, 4), group = factor(c(1, 2, 1, 2)), se = c(0.1, 0.3, 0.3, 0.2) ) # library(ggplot2) and then he creates some limits from the se variables. limits - aes(ymax = resp + se, ymin=resp - se) [elements omitted] # and then he creates the plot (I'm interested in). p - ggplot(df, aes(colour=group, y=resp, x=trt)) p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2) I can (of course) get Hadley's example to run, but I can't do it on my data as I don't have a 'se' variable/don't know how to create it. I have a group variable, a treatment variable, and a response variable, but no se variable. Could anyone out there explain how I create a 'se' variable in my data? I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big fan). Your help is appreciated! Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] ggplot2, 'se' variable in geom_errorbar's limits?
Hi Scott, Thank you for taking the time to look at my problem! I played around with your example and realized that in solving the problem with limits by summarizing the data I loose the option to split the data along some third variable, say the 'color' variable in the diamonds data. Any idea on how I can solve the problem directly in ggplot2? Any ggplot2-expects out there? Sincerely, Eric On Sat, Feb 19, 2011 at 4:51 PM, Scott Chamberlain myrmecocys...@gmail.com wrote: require(ggplot2) data(diamonds) diamonds - diamonds[1:100,c(2,7)] # use ddply in plyr package (loaded with ggplot2) to get data to plot diamonds_df - ddply(diamonds, .(cut), summarise, mean_price = mean(price), se_price = sd(price)/sqrt(length(price)) ) limits - aes(ymax = mean_price + se_price, ymin = mean_price - se_price) ggplot(diamonds_df, aes(x = cut, y = mean_price)) + geom_point() + geom_errorbar(limits, width=0.2) Sincerely, Scott Chamberlain Rice University, EEB Dept. On Saturday, February 19, 2011 at 3:12 PM, Eric Fail wrote: Can't anybody give me a hint on how to solve this? I even bought the ggplot2-book, so you could also give a page (or a series of pages). Thanks, Eric On Thu, Feb 17, 2011 at 10:19 AM, Eric Fail eric.f...@gmx.com wrote: Dear R-list I'm working with with geom_errorbar; specifically I'm trying to reproduce the example Hadley Wickham have on http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the page) where he makes an nice plot with errorbars and then draw lines between the points. What confuses me is the 'limits' he defines for the errorbars from the se variable. First he creates a dataset, df - data.frame( trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, 4), group = factor(c(1, 2, 1, 2)), se = c(0.1, 0.3, 0.3, 0.2) ) # library(ggplot2) and then he creates some limits from the se variables. limits - aes(ymax = resp + se, ymin=resp - se) [elements omitted] # and then he creates the plot (I'm interested in). p - ggplot(df, aes(colour=group, y=resp, x=trt)) p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2) I can (of course) get Hadley's example to run, but I can't do it on my data as I don't have a 'se' variable/don't know how to create it. I have a group variable, a treatment variable, and a response variable, but no se variable. Could anyone out there explain how I create a 'se' variable in my data? I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big fan). Your help is appreciated! Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] ggplot2, 'se' variable in geom_errorbar's limits?
Thank you Scott and Ista, I really appreciate your help! I solved it with Scott's help on the ddply. For the record, here is the working example that solves my initial question: ## install.packages(c(ggplot2, plyr)) require(ggplot2) data(diamonds) diamonds - diamonds[1:100,c(2,7)] # use ddply in plyr package (loaded with ggplot2) to get data to plot diamonds_df - ddply(diamonds, .(cut, color), summarise, mean_price = mean(price), se_price = sd(price)/sqrt(length(price)) ) limits - aes(ymax = mean_price + se_price, ymin = mean_price - se_price) ggplot(diamonds_df, aes(colour= color, x = cut, y = mean_price)) + geom_point() + geom_line(aes(group= color)) + geom_errorbar(limits, width=0.2) Very grateful! Eric On Sat, Feb 19, 2011 at 6:17 PM, Scott Chamberlain myrmecocys...@gmail.com wrote: Hi Eric, I would just include that third variable in the ddply call, for example: ddply(diamonds, .(cut, clarity, etc...), summarise, mean = mean(price, se = ... ) where you can summarise by multiple variables within the .(x, y, etc.)\\ I think that answers your question. Let me know if not. The example I sent earlier was just for simplicity. Scott On Saturday, February 19, 2011 at 4:58 PM, Eric Fail wrote: Hi Scott, Thank you for taking the time to look at my problem! I played around with your example and realized that in solving the problem with limits by summarizing the data I loose the option to split the data along some third variable, say the 'color' variable in the diamonds data. Any idea on how I can solve the problem directly in ggplot2? Any ggplot2-expects out there? Sincerely, Eric On Sat, Feb 19, 2011 at 4:51 PM, Scott Chamberlain myrmecocys...@gmail.com wrote: require(ggplot2) data(diamonds) diamonds - diamonds[1:100,c(2,7)] # use ddply in plyr package (loaded with ggplot2) to get data to plot diamonds_df - ddply(diamonds, .(cut), summarise, mean_price = mean(price), se_price = sd(price)/sqrt(length(price)) ) limits - aes(ymax = mean_price + se_price, ymin = mean_price - se_price) ggplot(diamonds_df, aes(x = cut, y = mean_price)) + geom_point() + geom_errorbar(limits, width=0.2) Sincerely, Scott Chamberlain Rice University, EEB Dept. On Saturday, February 19, 2011 at 3:12 PM, Eric Fail wrote: Can't anybody give me a hint on how to solve this? I even bought the ggplot2-book, so you could also give a page (or a series of pages). Thanks, Eric On Thu, Feb 17, 2011 at 10:19 AM, Eric Fail eric.f...@gmx.com wrote: Dear R-list I'm working with with geom_errorbar; specifically I'm trying to reproduce the example Hadley Wickham have on http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the page) where he makes an nice plot with errorbars and then draw lines between the points. What confuses me is the 'limits' he defines for the errorbars from the se variable. First he creates a dataset, df - data.frame( trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, 4), group = factor(c(1, 2, 1, 2)), se = c(0.1, 0.3, 0.3, 0.2) ) # library(ggplot2) and then he creates some limits from the se variables. limits - aes(ymax = resp + se, ymin=resp - se) [elements omitted] # and then he creates the plot (I'm interested in). p - ggplot(df, aes(colour=group, y=resp, x=trt)) p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2) I can (of course) get Hadley's example to run, but I can't do it on my data as I don't have a 'se' variable/don't know how to create it. I have a group variable, a treatment variable, and a response variable, but no se variable. Could anyone out there explain how I create a 'se' variable in my data? I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big fan). Your help is appreciated! Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] ggplot2, 'se' variable in geom_errorbar's limits?
Dear R-list I'm working with with geom_errorbar; specifically I'm trying to reproduce the example Hadley Wickham have on http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the page) where he makes an nice plot with errorbars and then draw lines between the points. What confuses me is the 'limits' he defines for the errorbars from the se variable. First he creates a dataset, df - data.frame( trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, 4), group = factor(c(1, 2, 1, 2)), se = c(0.1, 0.3, 0.3, 0.2) ) # library(ggplot2) and then he creates some limits from the se variables. limits - aes(ymax = resp + se, ymin=resp - se) [elements omitted] # and then he creates the plot (I'm interested in). p - ggplot(df, aes(colour=group, y=resp, x=trt)) p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2) I can (of course) get Hadley's example to run, but I can't do it on my data as I don't have a 'se' variable/don't know how to create it. I have a group variable, a treatment variable, and a response variable, but no se variable. Could anyone out there explain how I create a 'se' variable in my data? I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big fan). Your help is appreciated! Thanks, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] wildcard operator
Hi Ruser As so usual I'm trying to replicate some SAS code. I wold like to know if there is a wildcard operators, as : in SAS, in R? When running: lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 x9860, data=mydata) I would like to be able to get around it by just writing something like this: lm(y ~ x1:x9860, data=mydata) Anyone? Sorry for no including a working example, but I figured that it wasn't necessary. Thanks Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] big panel: filehash, bigmemory or other
Dear R-list I'm on my way to start a new project on a rather big panel, consisting of approximately 8 million observations in 30 waves of data and about 15 variables. I have a similar data set that is approximately 7 gigabytes in size. Until now I have done my data management in SAS, and Stata, mostly identifying spells, counting events in intervals, and a like, but I would like to do the data management-and fitting my models-in R. Though R can't handle the data in a normal R-way, it's simply too big. So I thought of trying either filehash, bigmemory or some other similar package I haven't heard of (yet). In the documentation to 'bigmemory' is says that the package is capable of ``basic manipulation '' on ``manageable subsets of the data '', but what does that actually mean? Since learning this in R is a rather time consuming process, and I know SAS is capable of doing the data management, and have the proc mixed module, I wanted to ask on the list, before I set out on this odyssey. Does anyone out there have any practical experience with data sets (panels) that size and maybe some experience fitting a model, presumably using the lmer package or alike, using filehash or bigmemory, that they would be willing to share? Thanks in advance, Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] ggplot2: ... seem to be overwriting each other
Dear list A week ago Dennis Murphy helped me out by showing me some nice ggplot2 tricks . Now I got stuck in a new problem that I can't solve (I have ordered the ggplot2-book). My problem is that I can't add my spline (or geom_smooth) and at the same time control the grid (using scale_x_continuous), they seem to overwrite each other. I have continued the working example from my last question (http://n4.nabble.com/add-spline-to-longitudinal-data-preferably-similar-to-SAS-s-I-SM50S-routine-td1017138.html ) example start tolerance.pp - read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt , sep=,, header=T) # install.packages(ggplot2, dep = T) library(ggplot2) plot - ggplot(tolerance.pp, aes(age, tolerance, group = id)) + geom_line() plot + geom_smooth(aes(group = male, colour = male), size = 1, se = FALSE) plot + scale_x_continuous(breaks = c(10, 12, 13, 15)) # plot + scale_x_continuous(limits = c(9, 16)) example end I have added the 'plot + scale_x_continuous(limits = c(9, 16)) ' since this seem to conflict as well. Thanks in advance! Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] ggplot2: ... seem to be overwriting each other
Hi Dennis (cc Lucien Lemmens and the r-help) Thank you for your help. I got help from Mr. Lucien Lemmens as well, and thanks to him to (again). The thing is, if, my working example, is run line by line, it overwrites ... or one thing overwrites the next. However both you and Lucien Lemmens solved my problem. Lucien Lemmens solution. plot - ggplot(tolerance.pp, aes(age, tolerance, group= id)) + geom_line() plot - plot+geom_smooth(aes(group=male,colour=male),size=1,se=FALSE) plot + scale_x_continuous(breaks = c(10, 12, 13, 15)) I know it seems as there is no purpose, but I have a huge dataset (here) where the measures are at 6, 12, 20 and 24 (i think), which was the reason for my question. Maybe I should have written that in the mail. Anyhow, here is two solutions on how to make the grid in a ggplot2 unequally spaced, if anyone ever should run it to that problem agin. Thanks for all your help. Eric On 31/01/2010, at 17.51, Dennis Murphy wrote: Hi: I don't quite see what the problem is, but I can show you a couple of things that might be useful... (1) you want male to be a factor, but in the data set, it takes integer values; therefore, you need to redefine it as a factor: str(tolerance.pp) 'data.frame': 80 obs. of 6 variables: $ id : int 9 9 9 9 9 45 45 45 45 45 ... $ age : int 11 12 13 14 15 11 12 13 14 15 ... $ tolerance: num 2.23 1.79 1.9 2.12 2.66 1.12 1.45 1.45 1.45 1.99 ... $ male : int 0 0 0 0 0 1 1 1 1 1 ... $ exposure : num 1.54 1.54 1.54 1.54 1.54 1.16 1.16 1.16 1.16 1.16 ... $ time : int 0 1 2 3 4 0 1 2 3 4 ... tolerance.pp$male - factor(tolerance.pp$male, labels = c('F', 'M')) str(tolerance.pp) 'data.frame': 80 obs. of 6 variables: $ id : int 9 9 9 9 9 45 45 45 45 45 ... $ age : int 11 12 13 14 15 11 12 13 14 15 ... $ tolerance: num 2.23 1.79 1.9 2.12 2.66 1.12 1.45 1.45 1.45 1.99 ... $ male : Factor w/ 2 levels F,M: 1 1 1 1 1 2 2 2 2 2 ... $ exposure : num 1.54 1.54 1.54 1.54 1.54 1.16 1.16 1.16 1.16 1.16 ... $ time : int 0 1 2 3 4 0 1 2 3 4 ... This has an impact on the legend, as you'll see below. (2) I set scale_x_continuous(breaks = NA) in the initial setup and redefined it later on. There's probably a more efficient way, but it works. (3) I thought that xlim(9, 16) would extend the limits of the x- axis, but it had no effect on the plot. What you'll see below are x-ticks at 12, 13 and 15, although I don't see the purpose of it other than to see that it can be done... plot - ggplot(tolerance.pp, aes(age, tolerance, group = id)) + xlim(9, 16) + scale_x_continuous(breaks = NA) plot + geom_line() + geom_smooth(aes(group = male, colour = male), size = 1.2, se = FALSE) + scale_x_continuous(breaks = c(10, 12, 13, 15)) + scale_colour_hue(gender) The last line changes the legend title to something a little more evocative. HTH, Dennis On Sun, Jan 31, 2010 at 1:28 PM, Eric Fail e...@it.dk wrote: Dear list A week ago Dennis Murphy helped me out by showing me some nice ggplot2 tricks . Now I got stuck in a new problem that I can't solve (I have ordered the ggplot2-book). My problem is that I can't add my spline (or geom_smooth) and at the same time control the grid (using scale_x_continuous), they seem to overwrite each other. I have continued the working example from my last question (http://n4.nabble.com/add-spline-to-longitudinal-data-preferably-similar-to-SAS-s-I-SM50S-routine-td1017138.html ) example start tolerance.pp - read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt , sep=,, header=T) # install.packages(ggplot2, dep = T) library(ggplot2) plot - ggplot(tolerance.pp, aes(age, tolerance, group = id)) + geom_line() plot + geom_smooth(aes(group = male, colour = male), size = 1, se = FALSE) plot + scale_x_continuous(breaks = c(10, 12, 13, 15)) # plot + scale_x_continuous(limits = c(9, 16)) example end I have added the 'plot + scale_x_continuous(limits = c(9, 16)) ' since this seem to conflict as well. Thanks in advance! Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
[R] add spline to longitudinal data - preferably similar to SAS's 'I=SM50S' routine
Hi Ruser I'm trying to replicate some SAS code. I have to add a spline to my longitudinal spaghetti plot. I have the plot, but I can't add the spline, a overall trend line. In the SAS code they use the command 'I=SM50S' and I would prefer something similar. I’m using R 2.10.1 on windows XP… I have made this working example. tolerance.pp - read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt , sep=,, header=T) # install.packages(lattice, dep = T) library(lattice) xyplot(tolerance ~ age, groups = id, data=tolerance.pp, type = l) This is where I want to add a overall spline. Hope someone out there can figure this out. Thanks Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
Re: [R] add spline to longitudinal data - preferably similar to SAS's 'I=SM50S' routine
Hi Dennis Works like a charm. It's some of the best help I have ever received. Very thankful! Eric On 18/01/2010, at 19.12, Dennis Murphy wrote: Hi: There is a very similar example in the ggplot book by Hadley Wickham (section 4.5, pp. 50-52). Here's one approach using ggplot: library(ggplot2) p - ggplot(tolerance.pp, aes(age, tolerance, group = id)) + geom_line() p + geom_smooth(aes(group = 1), size = 2) The second command adds a smoothing spline in blue, with twice the line width as the individual spaghetti plots, and by default, a confidence envelope around it. To get rid of the envelope, include se = FALSE as an argument to geom_smooth(); to change the color, add the argument colour = 'red', for example. HTH, Dennis On Mon, Jan 18, 2010 at 3:25 PM, Eric Fail e...@it.dk wrote: Hi Ruser I'm trying to replicate some SAS code. I have to add a spline to my longitudinal spaghetti plot. I have the plot, but I can't add the spline, a overall trend line. In the SAS code they use the command 'I=SM50S' and I would prefer something similar. Im using R 2.10.1 on windows XP I have made this working example. tolerance.pp - read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt , sep=,, header=T) # install.packages(lattice, dep = T) library(lattice) xyplot(tolerance ~ age, groups = id, data=tolerance.pp, type = l) This is where I want to add a overall spline. Hope someone out there can figure this out. Thanks Eric __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] __ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.