[R] stop on rows where !is.na(mydata$ti_all)

2012-09-24 Thread Eric Fail
Dear R experts,

I got help to build a loop but there is a bug inside it that causes
one part of the mechanism to fail.

It should grow once, but if keep growing on rows where $ti_all is not NA.

Here is a wall of code that very crudely demonstrates the problem,
there is a couple of dim() outputs at the end where you can see how it
the second time around keeps adds (2) rows, but this does not happen
to row 2, I'm aware this is part of another function, but I can't
figure it out for that. The first thing that happen is correct, that 7
rows are added.

Any help or guidance would be appreciated.

Thanks,
Eric

lookup - structure(list(c_name = c(1L, 2L, 4L, 5L, 6L, 7L), t_name =
structure(1:6, .Label = c(Bob, Julian, Mitt, Ricky, Tom,
Victor), class = factor)), .Names = c(c_name, t_name), class =
data.frame, row.names = c(1, 2, 3, 4, 5, 6))

mydata - structure(list(id = c(1L, 1L, 2L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 6L, 7L, 7L, 7L, 7L, 8L, 9L), time =
c(intake_arm_1, v_001_arm_1, intake_arm_1, intake_arm_1,
intake_arm_1, v_001_arm_1, v_002_arm_1, v_003_arm_1,
v_004_arm_1, v_005_arm_1, v_006_arm_1, v_007_arm_1,
intake_arm_1, v_001_arm_1, intake_arm_1, intake_arm_1,
v_011_arm_1, v_012_arm_1, v_013_arm_1, intake_arm_1,
intake_arm_1), dat_all = c(NA, NA, NA, NA, NA, NA, NA, 2012-09-23,
2012-09-23, 2012-09-02, 2012-09-10, 2012-09-23, NA, NA, NA,
NA, 2012-09-23, 2012-09-23, 2012-09-23, NA, NA), ti_all = c(NA,
NA, NA, NA, NA, NA, NA, 6L, 44L, 33L, NA, 22L, NA, NA, NA, NA, 65L,
NA, 10L, NA, NA), ty_all = c(NA, NA, NA, NA, NA, NA, NA, out_,
out_, cma_, NA, cma_, NA, NA, NA, NA, out_, out_,
out_, NA, NA), out_c = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), cma_c = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), c_n = c(NA, 1L, NA, NA, NA, NA, NA, 7L, 4L, 7L, NA, 1L,
NA, 2L, NA, NA, 7L, 7L, 7L, NA, NA), t_name = c(Tom, NA,
Ricky, Ricky, Victor, NA, NA, NA, NA, NA, NA, NA, Julian,
NA, Julian, Bob, NA, NA, NA, Mitt, Mitt)), .Names = c(id,
time, dat_all, ti_all, ty_all, out_c, cma_c, c_n,
t_name), class = data.frame, row.names = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21))


 if(require(plyr)){
  print(plyr is loaded correctly)
 } else {
  print(trying to install plyr)
  install.packages('plyr')
  if(require(plyr)){
print(plyr installed and loaded)
  } else {
   stop(could not install plyr)
  }
}

newrows - ddply(mydata, .(id), function(subdata) {
subdata_ty = subdata[!is.na(subdata$ty_all), ]
if (NROW(subdata) == 1) {
r = subdata[1, ]
c(v_001_arm_1, NA, NA, NA, NA, NA,
lookup$c_name[lookup$t_name == r$t_name], NA)
}
else if (NROW(subdata_ty)  0) {
numbers = sapply(strsplit(subdata$time, _), function(l)
ifelse(l[1] != intake, as.numeric(l[2]), 0))
newname = paste(c(v, sprintf(%03d, max(numbers) + 1), arm, 1),
collapse=_)
r1 = subdata[1, ]
new_c_n = lookup$c_name[lookup$t_name == r1$t_name]

new_out_c = sum(subdata$ty_all == out_  !is.na(subdata$ti_all))
new_cma_c = sum(subdata$ty_all == cma_  !is.na(subdata$ti_all))


new_out_c = ifelse(new_out_c == 0, NA, new_out_c)
new_cma_c = ifelse(new_cma_c == 0, NA, new_cma_c)

return(c(newname, NA, NA, NA, new_out_c, new_cma_c, new_c_n, NA))
}
})

# recombine and sort
colnames(newrows) = colnames(mydata)
newdata = rbind(mydata, newrows)
newdata = newdata[order(newdata$id), ]

mydata2 - newdata

newrows2 - ddply(mydata2, .(id), function(subdata) {
subdata_ty = subdata[!is.na(subdata$ty_all), ]
if (NROW(subdata) == 1) {
r = subdata[1, ]
c(v_001_arm_1, NA, NA, NA, NA, NA,
lookup$c_name[lookup$t_name == r$t_name], NA)
}
else if (NROW(subdata_ty)  0) {
numbers = sapply(strsplit(subdata$time, _), function(l)
ifelse(l[1] != intake, as.numeric(l[2]), 0))
newname = paste(c(v, sprintf(%03d, max(numbers) + 1), arm, 1),
collapse=_)
r1 = subdata[1, ]
new_c_n = lookup$c_name[lookup$t_name == r1$t_name]

new_out_c = sum(subdata$ty_all == out_  !is.na(subdata$ti_all))
new_cma_c = sum(subdata$ty_all == cma_  !is.na(subdata$ti_all))


new_out_c = ifelse(new_out_c == 0, NA, new_out_c)
new_cma_c = ifelse(new_cma_c == 0, NA, new_cma_c)

return(c(newname, NA, NA, NA, new_out_c, new_cma_c, new_c_n, NA))
}
})

# recombine and sort
colnames(newrows2) = colnames(mydata2)
newdata2 = rbind(mydata2, newrows2)
newdata2 = newdata2[order(newdata2$id), ]


mydata3 - newdata2

newrows2 - ddply(mydata3, .(id), function(subdata) {
subdata_ty = subdata[!is.na(subdata$ty_all), ]
if (NROW(subdata) == 1) {
r = subdata[1, ]
c(v_001_arm_1, NA, NA, NA, NA, NA,
lookup$c_name[lookup$t_name == 

Re: [R] Parsing back to API strcuture

2012-09-17 Thread Eric Fail
Problem solved by Josh O'Brien on stackoverflow,
http://stackoverflow.com/questions/12393004/parsing-back-to-messy-api-strcuture/12435389#12435389

some_magic - function(df) {
## Replace NA with , converting column types as needed
df[] - lapply(df, function(X) {
if(any(is.na(X))) {X[is.na(X)] - ; X} else {X}
})

## Print integers in first column as 2-digit character strings
## (DO NOTE: Hardwiring the number of printed digits here is probably
## inadvisable, though needed to _exactly_ reconstitute RAW.API.)
df[[1]] - sprintf(%02.0f, df[[1]])

## Separately build header and table body, then suture them together
l1 - paste(names(df), collapse=,)
l2 - capture.output(write.table(df, sep=,, col.names=FALSE,
 row.names=FALSE))
out - paste0(c(l1, l2, ), collapse=\n)

## Reattach attributes
att - list(`Content-Type` = structure(c(text/html, utf-8),
.Names = c(, charset)))
attributes(out) - att
out
}

identical(some_magic(df), RAW.API)
# [1] TRUE


On Thu, Sep 13, 2012 at 11:32 AM, Eric Fail eric.f...@gmx.us wrote:
 Dear Jim,

 Thank you for your response I appreciate your effort!

 It is close, I must admit that. What I am looking for is an object
 that is identical to 'RAW.API,' or at least in the stricture (I guess
 i do not need the ,`Content-Type` = structure(c(text/html,
 utf-8), .Names = c(,
 charset))) part.

 When I investigate 'x.out' it also have the NA's. I've tried to fix
 it, but I had to give up. It is strange because getting there seems so
 easy (warning false logic!).

 Here is what I got on my looong and alternative route in the hope that
 someone on the list might be able to help

 RAW.API - 
 structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n,
 `Content-Type` = structure(c(text/html, utf-8), .Names =
 c(,charset)))

 # I used an alternative way of converting it to a dataset to keep the
 leading 0 in the id variables
 x - read.table(file = textConnection(RAW.API ), header = TRUE, sep =
 ,, na.strings = , stringsAsFactors = FALSE, colClasses =character)
 x

  # now put it back into the same string; write.csv does quote alphanumerics
 write.csv(x, textConnection('output', 'w'), row.names = FALSE)
 unlockBinding(output, env = .GlobalEnv)
 # fixes the problem with the header
 output[1] - gsub(\\\, , output[1])
 # removes NAs
 output - gsub(NA, \\, output)
 # removes \ at the beginning of each line
 output - gsub(^\\\, , output)
 # removes an  at the end of each line
 output - gsub(\\\$, , output)
 # same as before
 x.out - paste(output, collapse = '\n\')
 # adds an line break at the end
 x.out - gsub($, \n, x.out)

 # so much manual gsub ...

 Any help would be very much appreciated.

 On Wed, Sep 12, 2012 at 5:54 PM, jim holtman jholt...@gmail.com wrote:
 This is close, but it does quote the header names, but does produce
 the same dataframe when read back in:

 RAW.API - 
 structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n,
  `Content-Type` = structure(c(text/html, utf-8), .Names = c(, 
 charset)))
 x - read.csv(textConnection(RAW.API), as.is = TRUE)
 x
   id event_arm namedob pushed_text pushed_calc complete
 1  1 event_1_arm_1 John 1979-05-01  NA2
 2  1 event_2_arm_1 John 2012-09-02 abc 1231
 3  1 event_3_arm_1 John 2012-09-10  NA2
 4  2 event_1_arm_1 Mary 1951-09-10 def 4562
 5  2 event_2_arm_1 Mary 1978-09-12  NA2

 # now put it back into the same string; write.csv does quote alphanumerics
 write.csv(x, textConnection('output', 'w'), row.names = FALSE)
 x.out - paste(output, collapse = '\n')
 # read it back in to show it is the same
 x.in - read.csv(textConnection(x.out), as.is = TRUE)
 x.in
   id event_arm namedob pushed_text pushed_calc complete
 1  1 event_1_arm_1 John 1979-05-01  NA2
 2  1 event_2_arm_1 John 2012-09-02 abc 1231
 3  1 event_3_arm_1 John 2012-09-10  NA2
 4  2 event_1_arm_1 Mary 1951-09-10 def 4562
 5  2 event_2_arm_1 Mary 1978-09-12  NA2



 On Wed, Sep 12, 2012 at 8:21 PM, Eric Fail eric.f...@gmx.us wrote:
 Dear R experts,

 I'm reading data from an online database via API and it gets delivered in 
 this messy

Re: [R] Parsing back to API strcuture

2012-09-13 Thread Eric Fail
Dear Jim,

Thank you for your response I appreciate your effort!

It is close, I must admit that. What I am looking for is an object
that is identical to 'RAW.API,' or at least in the stricture (I guess
i do not need the ,`Content-Type` = structure(c(text/html,
utf-8), .Names = c(,
charset))) part.

When I investigate 'x.out' it also have the NA's. I've tried to fix
it, but I had to give up. It is strange because getting there seems so
easy (warning false logic!).

Here is what I got on my looong and alternative route in the hope that
someone on the list might be able to help

RAW.API - 
structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n,
`Content-Type` = structure(c(text/html, utf-8), .Names =
c(,charset)))

# I used an alternative way of converting it to a dataset to keep the
leading 0 in the id variables
x - read.table(file = textConnection(RAW.API ), header = TRUE, sep =
,, na.strings = , stringsAsFactors = FALSE, colClasses =character)
x

 # now put it back into the same string; write.csv does quote alphanumerics
write.csv(x, textConnection('output', 'w'), row.names = FALSE)
unlockBinding(output, env = .GlobalEnv)
# fixes the problem with the header
output[1] - gsub(\\\, , output[1])
# removes NAs
output - gsub(NA, \\, output)
# removes \ at the beginning of each line
output - gsub(^\\\, , output)
# removes an  at the end of each line
output - gsub(\\\$, , output)
# same as before
x.out - paste(output, collapse = '\n\')
# adds an line break at the end
x.out - gsub($, \n, x.out)

# so much manual gsub ...

Any help would be very much appreciated.

On Wed, Sep 12, 2012 at 5:54 PM, jim holtman jholt...@gmail.com wrote:
 This is close, but it does quote the header names, but does produce
 the same dataframe when read back in:

 RAW.API - 
 structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n,
  `Content-Type` = structure(c(text/html, utf-8), .Names = c(, 
 charset)))
 x - read.csv(textConnection(RAW.API), as.is = TRUE)
 x
   id event_arm namedob pushed_text pushed_calc complete
 1  1 event_1_arm_1 John 1979-05-01  NA2
 2  1 event_2_arm_1 John 2012-09-02 abc 1231
 3  1 event_3_arm_1 John 2012-09-10  NA2
 4  2 event_1_arm_1 Mary 1951-09-10 def 4562
 5  2 event_2_arm_1 Mary 1978-09-12  NA2

 # now put it back into the same string; write.csv does quote alphanumerics
 write.csv(x, textConnection('output', 'w'), row.names = FALSE)
 x.out - paste(output, collapse = '\n')
 # read it back in to show it is the same
 x.in - read.csv(textConnection(x.out), as.is = TRUE)
 x.in
   id event_arm namedob pushed_text pushed_calc complete
 1  1 event_1_arm_1 John 1979-05-01  NA2
 2  1 event_2_arm_1 John 2012-09-02 abc 1231
 3  1 event_3_arm_1 John 2012-09-10  NA2
 4  2 event_1_arm_1 Mary 1951-09-10 def 4562
 5  2 event_2_arm_1 Mary 1978-09-12  NA2



 On Wed, Sep 12, 2012 at 8:21 PM, Eric Fail eric.f...@gmx.us wrote:
 Dear R experts,

 I'm reading data from an online database via API and it gets delivered in 
 this messy comma separated structure,

 RAW.API - 
 structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n,
  `Content-Type` = structure(c(text/html, utf-8), .Names = c(, 
 charset)))

 I have this script that nicely parses it into a data frame,

 (df - read.table(file = textConnection(RAW.API), header = TRUE,
 sep = ,, na.strings = , stringsAsFactors = FALSE))
   id event_arm namedob pushed_text pushed_calc complete
 1  1 event_1_arm_1 John 1979-05-01NA  NA2
 2  1 event_2_arm_1 John 2012-09-02 abc 1231
 3  1 event_3_arm_1 John 2012-09-10NA  NA2
 4  2 event_1_arm_1 Mary 1951-09-10 def 4562
 5  2 event_2_arm_1 Mary 1978-09-12NA  NA2

 I then do some calculations and write them to pushed_text and pushed_calc 
 whereafter I need to format the data back to the messy comma separated 
 structure

[R] Parsing back to API strcuture

2012-09-12 Thread Eric Fail
Dear R experts,

I'm reading data from an online database via API and it gets delivered in this 
messy comma separated structure,

 RAW.API - 
 structure(id,event_arm,name,dob,pushed_text,pushed_calc,complete\n\01\,\event_1_arm_1\,\John\,\1979-05-01\,\\,\\,2\n\01\,\event_2_arm_1\,\John\,\2012-09-02\,\abc\,\123\,1\n\01\,\event_3_arm_1\,\John\,\2012-09-10\,\\,\\,2\n\02\,\event_1_arm_1\,\Mary\,\1951-09-10\,\def\,\456\,2\n\02\,\event_2_arm_1\,\Mary\,\1978-09-12\,\\,\\,2\n,
  `Content-Type` = structure(c(text/html, utf-8), .Names = c(, 
 charset)))

I have this script that nicely parses it into a data frame,

 (df - read.table(file = textConnection(RAW.API), header = TRUE, 
sep = ,, na.strings = , stringsAsFactors = FALSE))
   id     event_arm name        dob pushed_text pushed_calc complete
 1  1 event_1_arm_1 John 1979-05-01        NA          NA        2
 2  1 event_2_arm_1 John 2012-09-02         abc         123        1
 3  1 event_3_arm_1 John 2012-09-10        NA          NA        2
 4  2 event_1_arm_1 Mary 1951-09-10         def         456        2
 5  2 event_2_arm_1 Mary 1978-09-12        NA          NA        2

I then do some calculations and write them to pushed_text and pushed_calc 
whereafter I need to format the data back to the messy comma separated 
structure it came in.

I imagine something like this,

 API.back - `some magic command`(df, ...)

 identical(RAW.API, API.back)
 [1] TRUE

Some command that can format my data from the data frame I made, df, back to 
the structure that the raw API-object came in, RAW.API.

Any help would be appreciated.

Thanks for reading.

Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] subtract a list of vectors from a list of data.frames in an elegant way

2012-03-31 Thread Eric Fail
a working solution to the problem,

 a - DBquery[names(lookup)]

mother.of.lookup - list()
for(string in names(a)) {
   a[[string]] - names(a[[string]])
   mother.of.lookup[[string]] - setdiff(a[[string]], lookup[[string]])
}

identical(mother.of.lookup, result)

It might not be the most elegant solution, but it works.

Best,
Eric

On Thu, Mar 29, 2012 at 4:07 AM, Jim Holtman jholt...@gmail.com wrote:

 ?setdiff

 Sent from my iPad

 On Mar 29, 2012, at 4:28, Eric Fail eric.f...@gmx.us wrote:

  Dear R experts,
 
  I've realized that it might not be possible to define a negative SELCET 
  statement in a SQL call so now I'm looking for the smoothest way to 
  generate a list of what I would like from my large database by first 
  pulling all the names with a query like this SELECT top 1 * FROM 
  your_table (thank you Bart Joosen for the idea) and then subtract the 
  variables I am not allow to pull manually ending up with a 'positive' 
  definition of what I want, something I can use in a SQL SELCT statement 
  (see my email on this list from yesterday for more on that).
 
  When I query the database for the variable names I get something similar to 
  'DBquery' in my working example below, but considerable longer with over 
  2400 hundred variables. As I only need to remove two or three variables I 
  would like to define a lookup table (like the list 'lookup' in my example) 
  and subtract that from my data base query. Now to my question. Is there a 
  way I can subtract one list from another? Like setoff or alike?
 
  I would like to end up with a list like the one shown in my example called 
  'result.' In short, I would like to subtract 'lookup' from 'DBquery' and 
  end up with 'result,' please note that 'result' is a list fo vecktors and 
  not a list of dataframes. In my real life example DBquery is considerable 
  longer so defining that by hand would make a really really long syntax.
 
  Hope someone know some smart function that I can use to solve my problem in 
  an elegant way.
 
  Thanks for reading.
 
  Erick
 
  ## begin R code ##
 
  DBquery     - list(tableA=data.frame(id = numeric(0), atwin = numeric(0), 
  atrout = numeric(0)),
                     tableB=data.frame(id = numeric(0), mq   = numeric(0), z 
  = numeric(0), m = numeric(0)),
                     tableC=data.frame(V1 = numeric(0), mfn   = numeric(0), 
  iiff = numeric(0)),
                     tableD=data.frame(id    = numeric(0), msf   = 
  numeric(0), oom  = numeric(0)))
 
  lookup     - list(tableA=   c('atwin', 'atrout'),
                    tableB=    c('m', 'z'),
                    tableC=    'ALL')
 
  ### ...
 
  result     - list(tableA= c('id'),
                     tableB= c('id', 'mq'),
                     tableC= c('V1', 'mfn', 'iiff'))
 
  __
  R-help@r-project.org mailing list
  https://stat.ethz.ch/mailman/listinfo/r-help
  PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
  and provide commented, minimal, self-contained, reproducible code.

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] subtract a list of vectors from a list of data.frames in an elegant way

2012-03-29 Thread Eric Fail
Dear R experts,

I've realized that it might not be possible to define a negative SELCET 
statement in a SQL call so now I'm looking for the smoothest way to generate a 
list of what I would like from my large database by first pulling all the names 
with a query like this SELECT top 1 * FROM your_table (thank you Bart Joosen 
for the idea) and then subtract the variables I am not allow to pull manually 
ending up with a 'positive' definition of what I want, something I can use in a 
SQL SELCT statement (see my email on this list from yesterday for more on 
that). 

When I query the database for the variable names I get something similar to 
'DBquery' in my working example below, but considerable longer with over 2400 
hundred variables. As I only need to remove two or three variables I would like 
to define a lookup table (like the list 'lookup' in my example) and subtract 
that from my data base query. Now to my question. Is there a way I can subtract 
one list from another? Like setoff or alike? 

I would like to end up with a list like the one shown in my example called 
'result.' In short, I would like to subtract 'lookup' from 'DBquery' and end up 
with 'result,' please note that 'result' is a list fo vecktors and not a list 
of dataframes. In my real life example DBquery is considerable longer so 
defining that by hand would make a really really long syntax.

Hope someone know some smart function that I can use to solve my problem in an 
elegant way.

Thanks for reading.

Erick

## begin R code ##

DBquery     - list(tableA=data.frame(id = numeric(0), atwin = numeric(0), 
atrout = numeric(0)),
                    tableB=data.frame(id = numeric(0), mq   = numeric(0), z = 
numeric(0), m = numeric(0)),
                    tableC=data.frame(V1 = numeric(0), mfn   = numeric(0), iiff 
= numeric(0)),
                    tableD=data.frame(id    = numeric(0), msf   = numeric(0), 
oom  = numeric(0)))

lookup     - list(tableA=   c('atwin', 'atrout'), 
                   tableB=    c('m', 'z'),
                   tableC=    'ALL')

### ...

result     - list(tableA= c('id'),
                    tableB= c('id', 'mq'),
                    tableC= c('V1', 'mfn', 'iiff'))

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] Is it possible to de-select with sqlQuery from the RODBC library?

2012-03-28 Thread Eric Fail
Thank you Bart for your idea, the thing is that I have a large number of
tables and I would like to avoid having to pull them at all.

I currently have a list that I use as a lookup table in a loop with an if
else statement to sort between tables I want to sqlFetch (take everything)
and tables where I sqlQuery (only want part of the table). The names of the
list itself constitute a positive definition of what tables I want to pull.

Here in a reduced illustrative example of what I am doing. My problem is
still that I would like to make negative selection so I get everything
except 'V1010' and 'V1012' in table 3, and so forth (please see below).

##   illustrative R example   ##

q.lookup - list(Table3 =   c('V1010', 'V1012'),
  Table7 =   c('V1040', 'V1052'),
  Table9 =   'ALL')
dfn - list()

for(i in names(q.lookup)) {
  if (q.lookup[[i]][1]==ALL) {
 query - names(q.lookup[1])
 table.n - sqlFetch(mdbConnect, query)
  } else if (q.lookup[[i]][1]!=ALL) {
 query - paste(select, paste(q.lookup[[i]], collapse=, ), from,
names(q.lookup[i]))
 table.n - sqlQuery(mdbConnect, query)
  } else print(your SQL call is gone haywire, fix it in line 193-204)
  dfn[[i]] - table.n
}

###   end of illustrative R example   

I could use your solution, I think, but if at all possible I would prefer
to figure out how to make a negative SQL statement (I still imagine that
there is some reverse function of the SQL select statement somewhere out
there).

With hight hopes.

Eric

On Wed, Mar 28, 2012 at 2:24 AM, Bart Joosen bartjoo...@hotmail.com wrote:

 What you can do: SELECT top 1 * FROM your_table;
 Use this selection to find all your column names in R
 then paste everything together without the names you don't want and then
 run
 your query.

 Bart

 --
 View this message in context:
 http://r.789695.n4.nabble.com/Is-it-possible-to-de-select-with-sqlQuery-from-the-RODBC-library-tp4511189p4511800.html
 Sent from the R help mailing list archive at Nabble.com.

 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide
 http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.


[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] Is it possible to de-select with sqlQuery from the RODBC library?

2012-03-27 Thread Eric Fail
Dear R-list,

I'm queering a M$ Access database with the sqlQuery function from the RODBC 
library. As I cannot make a working example with a database here is an 
illustrative example,

library(RODBC)
mdbConnect-odbcConnectAccess(S:/data/ ... /databse.mdb)
data - sqlQuery(mdbConnect, select id, DOB, V1, V2, ..., V1009, V1011, V1013 
from someTable)

I want everything in the table (someTable), except 'V1010' and 'V1012,' but I 
can't figure out how to make a negative or reverse SQL select statement. I have 
a lot of someTables and I have two or three variables in each table that I do 
not want R to fetch,

Is there a way to define a reverse select in SQL? One would imagine it would 
look something like this,

data - sqlQuery(mdbConnect, deselect V1010, V1o12 from someTable)

Thanks,
Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] Sankey Diagrams in R

2012-03-19 Thread Eric Fail
Dear R-list,

I am trying to visualize where the dropout happens in our patient flow. We are 
currently using traditional flowcharts and it bothers me that I can't visualize 
both the percentage and the flow in one diagram.

The other day I came across some interesting diagrams doing exactly what I 
wanted, they had both flow and percentages visualized on one diagram. Here is 
some nice examples apparently made with ‘sankeypython’ 
http://www.sankey-diagrams.com/tag/software/

It didn't take long to find a blog where a Ruser (thanks!) had posted an R 
script that actually produces an Sankey Diagram in R 
http://biologicalposteriors.blogspot.com/2010/07/sankey-diagrams-in-r.html

See below for working example.

My questions are, is this the most updated Sankey Diagram-script we have in the 
R community? Is there a better way to visualize flow and percentages in one 
diagram in R?

Thanks,
Eric

## the working example

## th, 
https://tonybreyal.wordpress.com/2011/11/24/source_https-sourcing-an-r-script-from-github/
sourc.https - function(url, ...) {
  # load package
require(RCurl)
  # install.packages(c(RCurl), dependencies = TRUE)

  # parse and evaluate each .R script
  sapply(c(url, ...), function(u) {
    eval(parse(text = getURL(u, followlocation = TRUE, cainfo = 
system.file(CurlSSL, cacert.pem, package = RCurl))), envir = .GlobalEnv)
  })
}

# Example from https://gist.github.com/1423501
sourc.https(https://raw.github.com/gist/1423501/55b3c6f11e4918cb6264492528b1ad01c429e581/Sankey.R;)

# My example (there is another example inside Sankey.R):
inputs = c(6, 144)
losses = c(6,47,14,7, 7, 35, 34)
unit = n =
labels = c(Transfers,
   Referrals\n,
   Unable to Engage,
   Consultation only,
   Did not complete the intake,
   Did not engage in Treatment,
   Discontinued Mid-Treatment,
   Completed Treatment,
   Active in \nTreatment)
SankeyR(inputs,losses,unit,labels)

# Clean up my mess
rm(inputs, labels, losses, SankeyR, sourc.https, unit)

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] looping over string of frames when importing with 'sqlFetch' from a Microsoft Access database

2012-02-24 Thread Eric Fail
Dear R-list,

I am trying to import (all) frames from a Microsoft Access database as 
individual data frames in a fancy loop, but I'm having troubles figuring out 
how to use the 'sqlFetch' from the RODBS package in a loop (mostly because I 
can't figure out how to loop over elements (I came from stata)

I would very much appreciate if anyone on the list could help me solve this 
problem, as it is an issue of connecting to a database I can't really make a 
working example, please bear with me.

### not-working R code ###

## first I establish a connection to my database
mdbConnect-odbcConnectAccess(C:\\... \\database.mdb)

## then I read of all the table names
stringTables - sqlTables(mdbConnect, tableType=c(TABLE))$TABLE_NAME

## and then I meet the wall ...
for(i.Frame in stringTables) {
    i.Frame - sqlFetch(mdbConnect, i.Frame)
}
## this broken loop creates one data frame called containing the 'i.Frame' 
containing the last frame in the 'stringTables.' I'm not doing this correct.

## the final step.
DF - stringTables[[1]]
for ( .df in stringTables) {
  DF -merge(DF,.df, by.x=uniqueid, by.y=uniqueid, all=T)
 }

### end of not-working R code ###

Thanks,
Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] looping over string of frames when importing with 'sqlFetch' from a Microsoft Access database

2012-02-24 Thread Eric Fail
Problem solved thanks to Peter Langfelder's response to Adel ESSAFI.

This is what should be in the loop,

dfn = list();
for (i in length(stringTables) {
dfn[[ http://piratepad.net/ii http://piratepad.net/i]] -
sqlFetch(mdbConnect, stringTables[[
http://piratepad.net/iihttp://piratepad.net/i
]])
}

Thanks,
Eric

On Fri, Feb 24, 2012 at 3:19 PM, Eric Fail eric.f...@gmx.us wrote:

 Dear R-list,

 I am trying to import (all) frames from a Microsoft Access database as
 individual data frames in a fancy loop, but I'm having troubles figuring
 out how to use the 'sqlFetch' from the RODBS package in a loop (mostly
 because I can't figure out how to loop over elements (I came from stata)

 I would very much appreciate if anyone on the list could help me solve
 this problem, as it is an issue of connecting to a database I can't really
 make a working example, please bear with me.

 ### not-working R code ###

 ## first I establish a connection to my database
 mdbConnect-odbcConnectAccess(C:\\... \\database.mdb)

 ## then I read of all the table names
 stringTables - sqlTables(mdbConnect, tableType=c(TABLE))$TABLE_NAME

 ## and then I meet the wall ...
 for(i.Frame in stringTables) {
 i.Frame - sqlFetch(mdbConnect, i.Frame)
 }
 ## this broken loop creates one data frame called containing the 'i.Frame'
 containing the last frame in the 'stringTables.' I'm not doing this correct.

 ## the final step.
 DF - stringTables[[1]]
 for ( .df in stringTables) {
   DF -merge(DF,.df, by.x=uniqueid, by.y=uniqueid, all=T)
  }

 ### end of not-working R code ###

 Thanks,
 Eric

 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide
 http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.


[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] looping over string of frames when importing with 'sqlFetch' from a Microsoft Access database

2012-02-24 Thread Eric Fail
correction, this is the working R script, forgot '1: ... )',

dfn = list();
for (i in 1:length(stringTables)) {
dfn[[i]] - sqlFetch(mdbConnect, stringTables[[i]])
}

On Fri, Feb 24, 2012 at 4:48 PM, Eric Fail eric.f...@gmx.us wrote:

 Problem solved thanks to Peter Langfelder's response to Adel ESSAFI.

 This is what should be in the loop,

 dfn = list();
 for (i in length(stringTables) {
 dfn[[ http://piratepad.net/ii http://piratepad.net/i]] -
 sqlFetch(mdbConnect, stringTables[[ 
 http://piratepad.net/iihttp://piratepad.net/i
 ]])
 }

 Thanks,
 Eric

 On Fri, Feb 24, 2012 at 3:19 PM, Eric Fail eric.f...@gmx.us wrote:

 Dear R-list,

 I am trying to import (all) frames from a Microsoft Access database as
 individual data frames in a fancy loop, but I'm having troubles figuring
 out how to use the 'sqlFetch' from the RODBS package in a loop (mostly
 because I can't figure out how to loop over elements (I came from stata)

 I would very much appreciate if anyone on the list could help me solve
 this problem, as it is an issue of connecting to a database I can't really
 make a working example, please bear with me.

 ### not-working R code ###

 ## first I establish a connection to my database
 mdbConnect-odbcConnectAccess(C:\\... \\database.mdb)

 ## then I read of all the table names
 stringTables - sqlTables(mdbConnect, tableType=c(TABLE))$TABLE_NAME

 ## and then I meet the wall ...
 for(i.Frame in stringTables) {
 i.Frame - sqlFetch(mdbConnect, i.Frame)
 }
 ## this broken loop creates one data frame called containing the
 'i.Frame' containing the last frame in the 'stringTables.' I'm not doing
 this correct.

 ## the final step.
 DF - stringTables[[1]]
 for ( .df in stringTables) {
   DF -merge(DF,.df, by.x=uniqueid, by.y=uniqueid, all=T)
  }

 ### end of not-working R code ###

 Thanks,
 Eric

 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide
 http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.




[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] plotting and coloring longitudinal data with three time points (ggplot2)

2011-12-07 Thread Eric Fail
 Dear list,

I have been struggling with this for some time now, and for the last hour I 
have been struggling to make a working example for the list. I hope someone out 
there have some experience with plotting longitudinal data that they will share.

My data is some patient data with three different time stamps. First the 
patients are identified at different times (first time stamp). Second, they go 
through an assessment phase and begin their treatment (time stamp 2). Finally 
they are admitted from the hospital at some point (time stamp 3),

I would like to make a spaghetti plot with the assessment phase in one color 
and the treatment phase in another color.

I used ggplot2, and with this example data and only two time points; it works 
fine (I call it my working example),

library(ggplot2)
df - data.frame( 
  date = seq(Sys.Date(), len=104, by=1 day)[sample(104, 52)], 
   patient = factor(rep(1:26, 2), labels = LETTERS)
 ) 
df - df[order(df$date), ] 
dt - qplot(date, patient, data=df, geom=line) 
dt + scale_x_date()
df[ which(df$patient=='E'), c(patient, date)]

But, if I have three time points, R, for some reason I do not yet understand, 
add the two second time points in some funny way.

Finally, when that is solved; how do I colorize the different parts of the line 
so the assessment phase gets one color and the treatment phase another?

I want to be able to show how long we have been in contact with our patients, 
how much of the contact time that was assessment and how much that was actual 
treatment.

Below is an example (I call it the not-working example)

df2 - data.frame( 
  date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)], 
  patient2 = factor(rep(1:26, 3), labels = LETTERS)
 )

df2 - df2[order(df2$date2), ] 
dt2 - qplot(date2, patient2, data=df2, geom=line) 
dt2 + scale_x_date(major=months, minor=weeks) 
df2[ which(df2$patient2=='B'), c(patient2, date2)]

If someone can point me in a direction or tell me what I am doing wrong or if 
there is some amazing package for plotting longitudinal data I would be very 
grateful.

Thanks,
Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] plotting and coloring longitudinal data with three time points (ggplot2)

2011-12-07 Thread Eric Fail
Thank you for solving my problem, it worked out beautifully.

This was exactly what I was looking for, the ggplot2 package keeps
impressing me.

Thanks,
Eric

On Wed, Dec 7, 2011 at 6:01 AM, Hadley Wickham had...@rice.edu wrote:
 On Wed, Dec 7, 2011 at 4:02 AM, Eric Fail eric.f...@gmx.us wrote:
  Dear list,

 I have been struggling with this for some time now, and for the last hour I 
 have been struggling to make a working example for the list. I hope someone 
 out there have some experience with plotting longitudinal data that they 
 will share.

 My data is some patient data with three different time stamps. First the 
 patients are identified at different times (first time stamp). Second, they 
 go through an assessment phase and begin their treatment (time stamp 2). 
 Finally they are admitted from the hospital at some point (time stamp 3),

 I would like to make a spaghetti plot with the assessment phase in one color 
 and the treatment phase in another color.

 I used ggplot2, and with this example data and only two time points; it 
 works fine (I call it my working example),

 library(ggplot2)
 df - data.frame(
   date = seq(Sys.Date(), len=104, by=1 day)[sample(104, 52)],
    patient = factor(rep(1:26, 2), labels = LETTERS)
  )
 df - df[order(df$date), ]
 dt - qplot(date, patient, data=df, geom=line)
 dt + scale_x_date()
 df[ which(df$patient=='E'), c(patient, date)]

 But, if I have three time points, R, for some reason I do not yet 
 understand, add the two second time points in some funny way.

 Finally, when that is solved; how do I colorize the different parts of the 
 line so the assessment phase gets one color and the treatment phase another?

 I want to be able to show how long we have been in contact with our 
 patients, how much of the contact time that was assessment and how much that 
 was actual treatment.

 Below is an example (I call it the not-working example)

 df2 - data.frame(
   date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)],
   patient2 = factor(rep(1:26, 3), labels = LETTERS)
  )

 df2 - df2[order(df2$date2), ]
 dt2 - qplot(date2, patient2, data=df2, geom=line)
 dt2 + scale_x_date(major=months, minor=weeks)
 df2[ which(df2$patient2=='B'), c(patient2, date2)]

 Did you mean something like this?

 library(ggplot2)
 library(plyr)

 df2 - data.frame(
  date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)],
  patient2 = factor(rep(1:26, 3), labels = LETTERS)
 )

 df2 - ddply(df2, patient2, mutate, visit = order(date2))

 qplot(date2, patient2, data = df2, geom = line) +
  geom_point(aes(colour = factor(visit)))

 # or this?

 library(ggplot2)
 library(plyr)

 df2 - data.frame(
  date2 = seq(Sys.Date(), len= 156, by=2 day)[sample(156, 78)],
  patient2 = factor(rep(1:26, 3), labels = LETTERS)
 )

 df2 - ddply(df2, patient2, mutate, visit = order(date2))

 qplot(date2, patient2, data = df2, geom = line, colour =
 factor(visit), group = patient2)

 # Obviously the lines are drawn between the observations so you only
 see the first two visits.

 Hadley

 --
 Assistant Professor / Dobelman Family Junior Chair
 Department of Statistics / Rice University
 http://had.co.nz/

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] merge with origin information in new variable names

2011-04-25 Thread Eric Fail
Is there anyone out there who can suggest a way to solve this problem?

Thanks,
Esben

On Sun, Apr 24, 2011 at 8:53 PM, Jeff Newmiller
jdnew...@dcn.davis.ca.us wrote:
 Merge only lets you combine two tables at a time, but it does have a
 suffix argument that is intended to address your concern, but only for
 variable names that would conflict.

 In your example, the id variables are all sequenced exactly the same, so you
 could actually use cbind rather than merge.

 However, whether you use merge or cbind, I think the most direct route to
 your desired result is to rename the data columns before you combine them,
 using the names function on the left hand side of an assignment with a
 vector of new names on the right.
 ---
 Jeff Newmiller The . . Go Live...
 DCN:jdnew...@dcn.davis.ca.us Basics: ##.#. ##.#. Live Go...
 Live: OO#.. Dead: OO#.. Playing
 Research Engineer (Solar/Batteries O.O#. #.O#. with
 /Software/Embedded Controllers) .OO#. .OO#. rocks...1k
 ---
 Sent from my phone. Please excuse my brevity.

 Eric Fail eric.f...@gmx.com wrote:

 Dear R-list,

 Here is my simple question,

 I have n data frames that I would like to merge, but I can't figure out
 how to add information about the origin of the variable(s).

 Here is my problem,

 DF.wave.1 - data.frame(id=1:10,var.A=sample(letters[1:4],10,TRUE))
 DF.wave.2 - data.frame(id=1:10,var.M=sample(letters[5:8],10,TRUE))
 DF.wave.3 - data.frame(id=1:10,var.A=sample(letters[5:8],10,TRUE))

 Now; I would like to merge the three dataframes into one, but append a
 suffix to the individual variables names about thir origin.

 DF.wave.all - merge(DF.wave.1,DF.wave.2,DF.wave.3,by=id, [what to do
 here])

 In other words, I would like it to loook like this.

 DF.wave.all
id var.A.wave.1 var.M.wave.2 var.A.wave.3
 1   1chj
 2   2cej
 3   3cgk
 4   4cej
 5   5cgi
 6   6dek
 7   7chk
 8   8bgj
 9   9bfi
 10 10dhi


 Is there a command I can use directly in merge? 'suffixes' isn't really
 handy here.

 Thanks,
 Eric
 
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide
 http://www.R-project.org/posting-guide.html and provide commented, minimal,
 self-contained, reproducible code.


__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] merge with origin information in new variable names

2011-04-24 Thread Eric Fail
Dear R-list,

Here is my simple question,

I have n data frames that I would like to merge, but I can't figure
out how to add information about the origin of the variable(s).

Here is my problem,

DF.wave.1 - data.frame(id=1:10,var.A=sample(letters[1:4],10,TRUE))
DF.wave.2 - data.frame(id=1:10,var.M=sample(letters[5:8],10,TRUE))
DF.wave.3 - data.frame(id=1:10,var.A=sample(letters[5:8],10,TRUE))

Now; I would like to merge the three dataframes into one, but append a
suffix to the individual variables names about thir origin.

DF.wave.all - merge(DF.wave.1,DF.wave.2,DF.wave.3,by=id, [what to do here])

In other words, I would like it to loook like this.

DF.wave.all
   id var.A.wave.1 var.M.wave.2 var.A.wave.3
1   1chj
2   2cej
3   3cgk
4   4cej
5   5cgi
6   6dek
7   7chk
8   8bgj
9   9bfi
10 10dhi


Is there a command I can use directly in merge? 'suffixes' isn't
really handy here.

Thanks,
Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] Parsing question, partly comma separated partly underscore separated string

2011-03-07 Thread Eric Fail
Thanks to Gabor Grothendieck and Dennis Murphy I can now solve first
part of my problem and already impress my colleagues with the
R-program below (I know it could be written in a smarter way, but I am
learning). It reads my partly comma separated partly underscore
separated string and cleans it up in a very need way.

Regardless of my inability to write tight code I moved on to the
second part of my quest, to put it all in to a loop to be able to loop
over my approximately 100 .txt files in /usr2/username/data/ I got
started with list.files() and my loop is more or less working, but I
got stuck on the last cbind part.

Is there a friendly R-hacker out there that would be willing to take a
look at my loop below*2?

Thanks,
Eric

###
####
##   The answer to the first part  of my question   ##
####
###

Line - readLines(file(/usr2/efail/data/example.txt))
s - strsplit(Line, ZZ_)[[1]]
s2 - sub(BLOCK.*, BLOCK, s)
s3 - sub(@9z.svg, , s2)
s4 - gsub(_, ,, s3)
s5 - read.table(textConnection(s4[1]), sep = ,)
DF - read.table(textConnection(s4), skip = 1, sep = ,, as.is = TRUE)
DF$block - head(cumsum(c(, DF$V8) == BLOCK)+1, -1)
DF$run - ave(DF$block, DF$block, FUN = seq_along)
DF$V8 - NULL
names(DF) - c(IngNam, Tx, Ty, Treatment, x, y, Y, BLOCK, RUN)
DF$ID - s5$V1
DF


#
##  ##
##   The PARTLY WORKING loop##
##  ##
#

fname - list.files(/usr2/efail/data,pattern=.txt, full.names =
TRUE, recursive =TRUE, ignore.case = TRUE)

for (sp in 1:length(fname)) {
Line - readLines(file(fname[sp]))
s - strsplit(Line, ZZ_)[[1]]
s2 - sub(BLOCK.*, BLOCK, s)
s3 - sub(@9z.svg, , s2)
s4 - gsub(_, ,, s3)
s5 - read.table(textConnection(s4[1]), sep = ,)
DF - read.table(textConnection(s4), skip = 1, sep = ,, as.is = TRUE)
DF$block - head(cumsum(c(, DF$V8) == BLOCK)+1, -1)
DF$run - ave(DF$block, DF$block, FUN = seq_along)
DF$V8 - NULL
names(DF) - c(IngNam, Tx, Ty, Treatment, x, y, Y, BLOCK, RUN)
DF$ID - s5$V1
FINAL.DF - cbind(DF… ## This is where I got stuck.
}


On Mon, Mar 7, 2011 at 8:18 AM, Gabor Grothendieck
ggrothendi...@gmail.com wrote:
 On Sun, Mar 6, 2011 at 10:13 PM, Eric Fail eric.f...@gmx.com wrote:
 Dear R-list,

 I have a partly comma separated partly underscore separated string that I am 
 trying to parse into R.

 Furthermore I have a bunch of them, and they are quite long. I have now 
 spent most of my Sunday trying to figure this out and thought I would try 
 the list to see if someone here would be able to get me started.

 My data structure looks like this,

 (in a example.txt file)
 Subject ID,ExperimentName,2010-04-23,32:34:23,Version 0.4, 640 by 960  
 pixels, On Device M, M, 
 3.2.4,zz_373_462_488_...@9z.svg,592,820,3.35,zz_032_288_436_...@9z.svg,332,878,3.66,zz_384_204_433_...@9z.svg,334,824,3.28,zz_365_575_683_...@9z.svg,598,878,3.50,zz_005_480_239_...@9z.svg,630,856,8.03,zz_030_423_394_...@9z.svg,98,846,4.09,zz_033_596_398_...@9z.svg,636,902,3.28,zz_263_064_320_...@9z.svg,570,894,1.26,bl...@9z.svg,322,842,32.96,zz_004_088_403_...@9z.svg,606,908,3.32,zz_703_546_434_...@9z.svg,624,934,2.58,zz_712_348_543_...@9z.svg,20,828,5.36,zz_005_48_239_...@9z.svg,580,830,4.36,zz_310_444_623_...@9z.svg,586,806,0.08,zz_030_423_394_...@9z.svg,350,854,3.84,zz_340_382_539_...@9z.svg,570,894,1.26,bl...@9z.svg,542,840,4.44,zz_345_230_662_...@9z.svg,632,844,2.47,zz_006_335_309_...@9z.svg,96,930,3.63,zz_782_346_746_...@9z.svg,306,850,2.58,zz_334_200_333_...@9z.svg,304,842,3.34,zz_383_506_726_...@9z.svg,622,884,3.84,zz_294_360_448_...@9z.svg,90,858,3.56,zz_334_335_473_...@9z.svg,570,894,1.26,bl...@9z.svg,320,852,4.04,
 (end of example.txt file)

 The above is approximate 5% of the length of a full file, and then I got 
 about 100 of them. Please note that the strings end with a comma.

 I am trying to parse it into something like this

 ID ImgNam BLOCK RUN Tx Ty Treatment x y Y
 Subject ID 373 1 1 462 488 TRT 592 820 3.35
 Subject ID 32 1 2 288 436 CON 332 878 3.66
 Subject ID 384 1 3 204 433 TRT 334 824 3.28
 Subject ID 365 1 4 575 683 TRT 598 878 3.5
 Subject ID 5 1 5 480 239 CON 630 856 8.03
 Subject ID 30 1 6 423 394 CON 98 846 4.09
 Subject ID 33 1 7 596 398 CON 636 902 3.28
 Subject ID 263 1 8 64 320 TRT 570 894 1.26
 Subject ID 4 2 1 88 403 CON 606 908 3.32
 Subject ID 703 2 2 546 434 CON 624 934 2.58
 Subject ID 712 2 3 348 543 CON 20 828 5.36
 Subject ID 5 2 4 48 239 CON 580 830 4.36
 Subject ID 310 2 5 444 623 TRT 586 806 0.08
 Subject ID 30 2 6 423 394 CON 350 854 3.84
 Subject ID 340 2 7 382 539 TRT 570 894 1.26
 Subject ID 345 3 1 230 662 TRT 632 844 2.47
 Subject ID 6 3 2 335 309 CON 96 930 3.63
 Subject ID 782 3 3 346

[R] Parsing question, partly comma separated partly underscore separated string

2011-03-06 Thread Eric Fail
Dear R-list,

I have a partly comma separated partly underscore separated string that I am 
trying to parse into R.

Furthermore I have a bunch of them, and they are quite long. I have now spent 
most of my Sunday trying to figure this out and thought I would try the list to 
see if someone here would be able to get me started.

My data structure looks like this,

(in a example.txt file)
Subject ID,ExperimentName,2010-04-23,32:34:23,Version 0.4, 640 by 960  pixels, 
On Device M, M, 
3.2.4,zz_373_462_488_...@9z.svg,592,820,3.35,zz_032_288_436_...@9z.svg,332,878,3.66,zz_384_204_433_...@9z.svg,334,824,3.28,zz_365_575_683_...@9z.svg,598,878,3.50,zz_005_480_239_...@9z.svg,630,856,8.03,zz_030_423_394_...@9z.svg,98,846,4.09,zz_033_596_398_...@9z.svg,636,902,3.28,zz_263_064_320_...@9z.svg,570,894,1.26,bl...@9z.svg,322,842,32.96,zz_004_088_403_...@9z.svg,606,908,3.32,zz_703_546_434_...@9z.svg,624,934,2.58,zz_712_348_543_...@9z.svg,20,828,5.36,zz_005_48_239_...@9z.svg,580,830,4.36,zz_310_444_623_...@9z.svg,586,806,0.08,zz_030_423_394_...@9z.svg,350,854,3.84,zz_340_382_539_...@9z.svg,570,894,1.26,bl...@9z.svg,542,840,4.44,zz_345_230_662_...@9z.svg,632,844,2.47,zz_006_335_309_...@9z.svg,96,930,3.63,zz_782_346_746_...@9z.svg,306,850,2.58,zz_334_200_333_...@9z.svg,304,842,3.34,zz_383_506_726_...@9z.svg,622,884,3.84,zz_294_360_448_...@9z.svg,90,858,3.56,zz_334_335_473_...@9z.svg,570,894,1.26,bl...@9z.svg,320,852,4.04,
(end of example.txt file)

The above is approximate 5% of the length of a full file, and then I got about 
100 of them. Please note that the strings end with a comma.

I am trying to parse it into something like this 

ID ImgNam BLOCK RUN Tx Ty Treatment x y Y
Subject ID 373 1 1 462 488 TRT 592 820 3.35
Subject ID 32 1 2 288 436 CON 332 878 3.66
Subject ID 384 1 3 204 433 TRT 334 824 3.28
Subject ID 365 1 4 575 683 TRT 598 878 3.5
Subject ID 5 1 5 480 239 CON 630 856 8.03
Subject ID 30 1 6 423 394 CON 98 846 4.09
Subject ID 33 1 7 596 398 CON 636 902 3.28
Subject ID 263 1 8 64 320 TRT 570 894 1.26
Subject ID 4 2 1 88 403 CON 606 908 3.32
Subject ID 703 2 2 546 434 CON 624 934 2.58
Subject ID 712 2 3 348 543 CON 20 828 5.36
Subject ID 5 2 4 48 239 CON 580 830 4.36
Subject ID 310 2 5 444 623 TRT 586 806 0.08
Subject ID 30 2 6 423 394 CON 350 854 3.84
Subject ID 340 2 7 382 539 TRT 570 894 1.26
Subject ID 345 3 1 230 662 TRT 632 844 2.47
Subject ID 6 3 2 335 309 CON 96 930 3.63
Subject ID 782 3 3 346 746 TRT 306 850 2.58
Subject ID 334 3 4 200 333 TRT 304 842 3.34
Subject ID 383 3 5 506 726 TRT 622 884 3.84
Subject ID 294 3 6 360 448 TRT 90 858 3.56
Subject ID 334 3 7 335 473 TRT 570 894 1.26

I could do it in Excel, but it would take me a week--and it would be stupid--if 
someone could please help me get started I would very much appreciate it. It 
would not only benefit me, but my colleagues would see the benefit of R and the 
R-list in particular.

Thanks in advance!

Eric

--

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] ggplot2, 'se' variable in geom_errorbar's limits?

2011-02-19 Thread Eric Fail
Can't anybody give me a hint on how to solve this? I even bought the
ggplot2-book, so you could also give a page (or a series of pages).

Thanks,
Eric

On Thu, Feb 17, 2011 at 10:19 AM, Eric Fail eric.f...@gmx.com wrote:

 Dear R-list

 I'm working with with geom_errorbar; specifically I'm trying to
 reproduce the example Hadley Wickham have on
 http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the
 page) where he makes an nice plot with errorbars and then draw lines
 between the points.

 What confuses me is the 'limits' he defines for the errorbars from the
 se variable.

 First he creates a dataset,

 df - data.frame(
  trt = factor(c(1, 1, 2, 2)),
  resp = c(1, 5, 3, 4),
  group = factor(c(1, 2, 1, 2)),
  se = c(0.1, 0.3, 0.3, 0.2)
 )

 # library(ggplot2)

 and then he creates some limits from the se variables.

 limits - aes(ymax = resp + se, ymin=resp - se)

 [elements omitted]

 # and then he creates the plot (I'm interested in).

 p - ggplot(df, aes(colour=group, y=resp, x=trt))
 p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2)

 I can (of course) get Hadley's example to run, but I can't do it on my
 data as I don't have a 'se' variable/don't know how to create it. I
 have a group variable, a treatment variable, and a response variable,
 but no se variable.

 Could anyone out there explain how I create a 'se' variable in my data?

 I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big fan).

 Your help is appreciated!

 Thanks,
 Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] ggplot2, 'se' variable in geom_errorbar's limits?

2011-02-19 Thread Eric Fail
Hi Scott,

Thank you for taking the time to look at my problem!

I played around with your example and realized that in solving the
problem with limits by summarizing the data I loose the option to
split the data along some third variable, say the 'color' variable in
the diamonds data.

Any idea on how I can solve the problem directly in ggplot2? Any
ggplot2-expects out there?

Sincerely,
Eric


On Sat, Feb 19, 2011 at 4:51 PM, Scott Chamberlain
myrmecocys...@gmail.com wrote:
 require(ggplot2)

 data(diamonds)

 diamonds - diamonds[1:100,c(2,7)]

 # use ddply in plyr package (loaded with ggplot2) to get data to plot

 diamonds_df - ddply(diamonds, .(cut), summarise,

 mean_price = mean(price),

 se_price = sd(price)/sqrt(length(price))

 )

 limits - aes(ymax = mean_price + se_price, ymin = mean_price - se_price)

 ggplot(diamonds_df, aes(x = cut, y = mean_price)) +

 geom_point() +

 geom_errorbar(limits, width=0.2)

 Sincerely,

 Scott Chamberlain

 Rice University, EEB Dept.

 On Saturday, February 19, 2011 at 3:12 PM, Eric Fail wrote:

 Can't anybody give me a hint on how to solve this? I even bought the
 ggplot2-book, so you could also give a page (or a series of pages).

 Thanks,
 Eric

 On Thu, Feb 17, 2011 at 10:19 AM, Eric Fail eric.f...@gmx.com wrote:

 Dear R-list

 I'm working with with geom_errorbar; specifically I'm trying to
 reproduce the example Hadley Wickham have on
 http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the
 page) where he makes an nice plot with errorbars and then draw lines
 between the points.

 What confuses me is the 'limits' he defines for the errorbars from the
 se variable.

 First he creates a dataset,

 df - data.frame(
  trt = factor(c(1, 1, 2, 2)),
  resp = c(1, 5, 3, 4),
  group = factor(c(1, 2, 1, 2)),
  se = c(0.1, 0.3, 0.3, 0.2)
 )

 # library(ggplot2)

 and then he creates some limits from the se variables.

 limits - aes(ymax = resp + se, ymin=resp - se)

 [elements omitted]

 # and then he creates the plot (I'm interested in).

 p - ggplot(df, aes(colour=group, y=resp, x=trt))
 p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2)

 I can (of course) get Hadley's example to run, but I can't do it on my
 data as I don't have a 'se' variable/don't know how to create it. I
 have a group variable, a treatment variable, and a response variable,
 but no se variable.

 Could anyone out there explain how I create a 'se' variable in my data?

 I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big
 fan).

 Your help is appreciated!

 Thanks,
 Eric

 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.



__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] ggplot2, 'se' variable in geom_errorbar's limits?

2011-02-19 Thread Eric Fail
Thank you Scott and Ista,

I really appreciate your help! I solved it with Scott's help on the ddply.

For the record, here is the working example that solves my initial question:

## install.packages(c(ggplot2, plyr))
require(ggplot2)
data(diamonds)
diamonds - diamonds[1:100,c(2,7)]

# use ddply in plyr package (loaded with ggplot2) to get data to plot
diamonds_df - ddply(diamonds, .(cut, color), summarise,
mean_price = mean(price),
se_price = sd(price)/sqrt(length(price))
)

limits - aes(ymax = mean_price + se_price, ymin = mean_price - se_price)

ggplot(diamonds_df, aes(colour= color, x = cut, y = mean_price)) +
geom_point() +
geom_line(aes(group= color)) +
geom_errorbar(limits, width=0.2)

Very grateful!

Eric


On Sat, Feb 19, 2011 at 6:17 PM, Scott Chamberlain
myrmecocys...@gmail.com wrote:
 Hi Eric,
 I would just include that third variable in the ddply call, for example:
 ddply(diamonds, .(cut, clarity, etc...), summarise,
 mean = mean(price,
 se = ...
 )
 where you can summarise by multiple variables within the .(x, y, etc.)\\
 I think that answers your question. Let me know if not. The example I sent
 earlier was just for simplicity.
 Scott

 On Saturday, February 19, 2011 at 4:58 PM, Eric Fail wrote:

 Hi Scott,

 Thank you for taking the time to look at my problem!

 I played around with your example and realized that in solving the
 problem with limits by summarizing the data I loose the option to
 split the data along some third variable, say the 'color' variable in
 the diamonds data.

 Any idea on how I can solve the problem directly in ggplot2? Any
 ggplot2-expects out there?

 Sincerely,
 Eric


 On Sat, Feb 19, 2011 at 4:51 PM, Scott Chamberlain
 myrmecocys...@gmail.com wrote:

 require(ggplot2)

 data(diamonds)

 diamonds - diamonds[1:100,c(2,7)]

 # use ddply in plyr package (loaded with ggplot2) to get data to plot

 diamonds_df - ddply(diamonds, .(cut), summarise,

 mean_price = mean(price),

 se_price = sd(price)/sqrt(length(price))

 )

 limits - aes(ymax = mean_price + se_price, ymin = mean_price - se_price)

 ggplot(diamonds_df, aes(x = cut, y = mean_price)) +

 geom_point() +

 geom_errorbar(limits, width=0.2)

 Sincerely,

 Scott Chamberlain

 Rice University, EEB Dept.

 On Saturday, February 19, 2011 at 3:12 PM, Eric Fail wrote:

 Can't anybody give me a hint on how to solve this? I even bought the
 ggplot2-book, so you could also give a page (or a series of pages).

 Thanks,
 Eric

 On Thu, Feb 17, 2011 at 10:19 AM, Eric Fail eric.f...@gmx.com wrote:

 Dear R-list

 I'm working with with geom_errorbar; specifically I'm trying to
 reproduce the example Hadley Wickham have on
 http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the
 page) where he makes an nice plot with errorbars and then draw lines
 between the points.

 What confuses me is the 'limits' he defines for the errorbars from the
 se variable.

 First he creates a dataset,

 df - data.frame(
  trt = factor(c(1, 1, 2, 2)),
  resp = c(1, 5, 3, 4),
  group = factor(c(1, 2, 1, 2)),
  se = c(0.1, 0.3, 0.3, 0.2)
 )

 # library(ggplot2)

 and then he creates some limits from the se variables.

 limits - aes(ymax = resp + se, ymin=resp - se)

 [elements omitted]

 # and then he creates the plot (I'm interested in).

 p - ggplot(df, aes(colour=group, y=resp, x=trt))
 p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2)

 I can (of course) get Hadley's example to run, but I can't do it on my
 data as I don't have a 'se' variable/don't know how to create it. I
 have a group variable, a treatment variable, and a response variable,
 but no se variable.

 Could anyone out there explain how I create a 'se' variable in my data?

 I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big
 fan).

 Your help is appreciated!

 Thanks,
 Eric

 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.



__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] ggplot2, 'se' variable in geom_errorbar's limits?

2011-02-17 Thread Eric Fail
Dear R-list

I'm working with with geom_errorbar; specifically I'm trying to
reproduce the example Hadley Wickham have on
http://had.co.nz/ggplot2/geom_errorbar.html (all in the button of the
page) where he makes an nice plot with errorbars and then draw lines
between the points.

What confuses me is the 'limits' he defines for the errorbars from the
se variable.

First he creates a dataset,

df - data.frame(
  trt = factor(c(1, 1, 2, 2)),
  resp = c(1, 5, 3, 4),
  group = factor(c(1, 2, 1, 2)),
  se = c(0.1, 0.3, 0.3, 0.2)
)

# library(ggplot2)

and then he creates some limits from the se variables.

limits - aes(ymax = resp + se, ymin=resp - se)

[elements omitted]

# and then he creates the plot (I'm interested in).

p - ggplot(df, aes(colour=group, y=resp, x=trt))
p + geom_line(aes(group=group)) + geom_errorbar(limits, width=0.2)

I can (of course) get Hadley's example to run, but I can't do it on my
data as I don't have a 'se' variable/don't know how to create it. I
have a group variable, a treatment variable, and a response variable,
but no se variable.

Could anyone out there explain how I create a 'se' variable in my data?

I'm sure my reasoning is the one that is off, and not ggplot2 (I'm a big fan).

Your help is appreciated!

Thanks,
Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] wildcard operator

2010-05-26 Thread Eric Fail

Hi Ruser

As so usual I'm trying to replicate some SAS code. I wold like to know  
if there is a wildcard operators, as   :  in SAS, in R?


When running:

lm(y ~ x1 + x2 + x3 + x4 + x5 + x6  x9860, data=mydata)

I would like to be able to get around it by just writing something  
like this:


lm(y ~ x1:x9860, data=mydata)

Anyone?

Sorry for no including a working example, but I figured that it wasn't  
necessary.


Thanks

Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] big panel: filehash, bigmemory or other

2010-02-22 Thread Eric Fail

Dear R-list

I'm on my way to start a new project on a rather big panel, consisting  
of approximately 8 million observations in 30 waves of data and about  
15 variables. I have a similar data set that is approximately 7  
gigabytes in size.


Until now I have done my data management in SAS, and Stata, mostly  
identifying spells, counting events in intervals, and a like, but I  
would like to do the data management-and fitting my models-in R.


Though R can't handle the data in a normal R-way, it's simply too big.  
So I thought of trying either filehash, bigmemory or some other  
similar package I haven't heard of (yet). In the documentation to  
'bigmemory' is says  that the package is capable of ``basic  
manipulation '' on ``manageable subsets of the data '', but what does  
that actually mean?


Since learning this in R is a rather time consuming process, and I  
know SAS is capable of doing the data management, and have the proc  
mixed module, I wanted to ask on the list, before I set out on this  
odyssey.


Does anyone out there have any practical experience with data sets  
(panels) that size and maybe some experience fitting a model,  
presumably using the lmer package or alike, using filehash or  
bigmemory, that they would be willing to share?


Thanks in advance,
Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] ggplot2: ... seem to be overwriting each other

2010-01-31 Thread Eric Fail

Dear list

A week ago Dennis Murphy helped me out by showing me some nice ggplot2  
tricks . Now I got stuck in a new problem that I can't solve (I have  
ordered the ggplot2-book).


My problem is that I can't add my spline (or geom_smooth) and at the  
same time control the grid (using scale_x_continuous), they seem to  
overwrite each other.


I have continued the working example from my last question (http://n4.nabble.com/add-spline-to-longitudinal-data-preferably-similar-to-SAS-s-I-SM50S-routine-td1017138.html 
)


 example start 

tolerance.pp - read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt 
, sep=,, header=T)

# install.packages(ggplot2, dep = T)
library(ggplot2)

plot - ggplot(tolerance.pp, aes(age, tolerance, group = id)) +  
geom_line()
plot + geom_smooth(aes(group = male, colour = male), size = 1, se =  
FALSE)

plot + scale_x_continuous(breaks = c(10, 12, 13, 15))

# plot + scale_x_continuous(limits = c(9, 16))

 example end 

I have added the 'plot + scale_x_continuous(limits = c(9, 16)) ' since  
this seem to conflict as well.


Thanks in advance!

Eric

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] ggplot2: ... seem to be overwriting each other

2010-01-31 Thread Eric Fail
Hi Dennis (cc Lucien Lemmens and the r-help)

Thank you for your help.

I got help from Mr. Lucien Lemmens as well, and thanks to him to  
(again).

The thing is, if, my working example, is run line by line, it  
overwrites ... or one thing overwrites the next. However both you and  
Lucien Lemmens solved my problem.

Lucien Lemmens solution.

plot - ggplot(tolerance.pp, aes(age, tolerance, group= id)) +  
geom_line()
plot - plot+geom_smooth(aes(group=male,colour=male),size=1,se=FALSE)
plot + scale_x_continuous(breaks = c(10, 12, 13, 15))

I know it seems as there is no purpose, but I have a huge dataset  
(here) where the measures are at 6, 12, 20 and 24 (i think), which was  
the reason for my question. Maybe I should have written that in the  
mail.

Anyhow, here is two solutions on how to make the grid in a ggplot2  
unequally spaced, if anyone ever should run it to that problem agin.

Thanks for all your help.

Eric

On 31/01/2010, at 17.51, Dennis Murphy wrote:

 Hi:

 I don't quite see what the problem is, but I
 can show you a couple of things that might be useful...

 (1) you want male to be a factor, but in the data set, it takes  
 integer
  values; therefore, you need to redefine it as a factor:
  str(tolerance.pp)
 'data.frame':   80 obs. of  6 variables:
  $ id   : int  9 9 9 9 9 45 45 45 45 45 ...
  $ age  : int  11 12 13 14 15 11 12 13 14 15 ...
  $ tolerance: num  2.23 1.79 1.9 2.12 2.66 1.12 1.45 1.45 1.45  
 1.99 ...
  $ male : int  0 0 0 0 0 1 1 1 1 1 ...
  $ exposure : num  1.54 1.54 1.54 1.54 1.54 1.16 1.16 1.16 1.16  
 1.16 ...
  $ time : int  0 1 2 3 4 0 1 2 3 4 ...
  tolerance.pp$male - factor(tolerance.pp$male, labels = c('F', 'M'))
  str(tolerance.pp)
 'data.frame':   80 obs. of  6 variables:
  $ id   : int  9 9 9 9 9 45 45 45 45 45 ...
  $ age  : int  11 12 13 14 15 11 12 13 14 15 ...
  $ tolerance: num  2.23 1.79 1.9 2.12 2.66 1.12 1.45 1.45 1.45  
 1.99 ...
  $ male : Factor w/ 2 levels F,M: 1 1 1 1 1 2 2 2 2 2 ...
  $ exposure : num  1.54 1.54 1.54 1.54 1.54 1.16 1.16 1.16 1.16  
 1.16 ...
  $ time : int  0 1 2 3 4 0 1 2 3 4 ...

 This has an impact on the legend, as you'll see below.

 (2) I set scale_x_continuous(breaks = NA) in the initial setup and  
 redefined
  it later on. There's probably a more efficient way, but it works.
 (3) I thought that xlim(9, 16) would extend the limits of the x- 
 axis, but it
  had no effect on the plot. What you'll see below are x-ticks at  
 12, 13 and 15,
  although I don't see the purpose of it other than to see that  
 it can be done...

 plot - ggplot(tolerance.pp, aes(age, tolerance, group = id)) +  
 xlim(9, 16) +
 scale_x_continuous(breaks = NA)
 plot + geom_line() +
  geom_smooth(aes(group = male, colour = male), size = 1.2,  
 se = FALSE) +
  scale_x_continuous(breaks = c(10, 12, 13, 15)) +
  scale_colour_hue(gender)

 The last line changes the legend title to something a little more  
 evocative.

 HTH,
 Dennis


 On Sun, Jan 31, 2010 at 1:28 PM, Eric Fail e...@it.dk wrote:
 Dear list

 A week ago Dennis Murphy helped me out by showing me some nice  
 ggplot2 tricks . Now I got stuck in a new problem that I can't solve  
 (I have ordered the ggplot2-book).

 My problem is that I can't add my spline (or geom_smooth) and at the  
 same time control the grid (using scale_x_continuous), they seem to  
 overwrite each other.

 I have continued the working example from my last question 
 (http://n4.nabble.com/add-spline-to-longitudinal-data-preferably-similar-to-SAS-s-I-SM50S-routine-td1017138.html
  
 )

  example start 

 tolerance.pp - 
 read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt 
 , sep=,, header=T)
 # install.packages(ggplot2, dep = T)
 library(ggplot2)

 plot - ggplot(tolerance.pp, aes(age, tolerance, group = id)) +  
 geom_line()
 plot + geom_smooth(aes(group = male, colour = male), size = 1, se =  
 FALSE)
 plot + scale_x_continuous(breaks = c(10, 12, 13, 15))

 # plot + scale_x_continuous(limits = c(9, 16))

  example end 

 I have added the 'plot + scale_x_continuous(limits = c(9, 16)) '  
 since this seem to conflict as well.

 Thanks in advance!

 Eric

 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.



[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


[R] add spline to longitudinal data - preferably similar to SAS's 'I=SM50S' routine

2010-01-18 Thread Eric Fail

Hi Ruser

I'm trying to replicate some SAS code. I have to add a spline to my  
longitudinal spaghetti plot.


I have the plot, but I can't add the spline, a overall trend line. In  
the SAS code they use the command   'I=SM50S' and I would prefer  
something similar. I’m using R 2.10.1 on windows XP…


I have made this working example.

tolerance.pp - read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt 
, sep=,, header=T)

# install.packages(lattice, dep = T)
library(lattice)
xyplot(tolerance ~ age, groups = id, data=tolerance.pp, type = l)

This is where I want to add a overall spline.

Hope someone out there can figure this out.

Thanks

Eric
__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


Re: [R] add spline to longitudinal data - preferably similar to SAS's 'I=SM50S' routine

2010-01-18 Thread Eric Fail
Hi Dennis

Works like a charm. It's some of the best help I have ever received.

Very thankful!

Eric

On 18/01/2010, at 19.12, Dennis Murphy wrote:

 Hi:

 There is a very similar example in the ggplot book by Hadley Wickham  
 (section 4.5, pp. 50-52). Here's
 one approach using ggplot:

 library(ggplot2)
 p - ggplot(tolerance.pp, aes(age, tolerance, group = id)) +  
 geom_line()
 p + geom_smooth(aes(group = 1), size = 2)

 The second command adds a smoothing spline in blue, with twice the  
 line width as the
 individual spaghetti plots, and by default, a confidence envelope   
 around it. To get rid
 of the envelope, include se = FALSE as an argument to geom_smooth();  
 to change the
 color, add the argument colour = 'red', for example.

 HTH,
 Dennis

 On Mon, Jan 18, 2010 at 3:25 PM, Eric Fail e...@it.dk wrote:
 Hi Ruser

 I'm trying to replicate some SAS code. I have to add a spline to my  
 longitudinal spaghetti plot.

 I have the plot, but I can't add the spline, a overall trend line.  
 In the SAS code they use the command   'I=SM50S' and I would prefer  
 something similar. I’m using R 2.10.1 on windows XP…

 I have made this working example.

 tolerance.pp - 
 read.table(http://www.ats.ucla.edu/stat/R/examples/alda/tolerance1_pp.txt 
 , sep=,, header=T)
 # install.packages(lattice, dep = T)
 library(lattice)
 xyplot(tolerance ~ age, groups = id, data=tolerance.pp, type = l)

 This is where I want to add a overall spline.

 Hope someone out there can figure this out.

 Thanks

 Eric
 __
 R-help@r-project.org mailing list
 https://stat.ethz.ch/mailman/listinfo/r-help
 PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
 and provide commented, minimal, self-contained, reproducible code.



[[alternative HTML version deleted]]

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.