subject:"\[R\] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work"

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

2007-03-08 Thread ALAN SMITH

Hello R-users
The help I received from Petr helped me created this solution to my problems.

t1-with(mydata ,aggregate(mydata$Y,
list(mydata$time,mydata$treatment, mydata$expREP, mydata$techREP) ,
median, na.rm=T)) ### find median by factors 

colnames(t1)-c(time,treatment,expREP,techREP,Y50) ### column name ##

newdata-merge(mydata, t1, by.x= names(mydata)[2:5],
by.y=names(t1)[1:4], all=T)

Thank you,
Alan







###
Message: 97
Date: Thu, 08 Mar 2007 08:00:53 +0100
From: Petr Pikal [EMAIL PROTECTED]
Subject: Re: [R] how to apply functions to unbalanced data in long
   format  by  factors..cant get by or aggregate to work
To: ALAN SMITH [EMAIL PROTECTED], r-help@stat.math.ethz.ch
Message-ID: [EMAIL PROTECTED]
Content-Type: text/plain; charset=US-ASCII

Hi

you can use aggregate to create table of medians

with(mydata, aggregate(Y, list(time, tratment, expRep,), median)

repeats of unique factors
either by rle or aggregate with length function

Then you can do replication by

norep - rep(your.median, each = your replicates)

Regards
Petr



submitted question abrigded
 Hello R users,



 #Example data frame##
 mydata-as.data.frame(structure(list(cpdID = c(7, 7, 7, 7, 7, 7, 8, 8,
 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 10, 10, 10, 10, 10, 10, 19, 19, 19, 19, 19, 19, 23, 23, 23, 23, 23,
 23, 23, 23, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
 33, 33, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
 40, 40, 40, 40, 40, 40, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 47, 47,
 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
 47, 47), time = structure(as.integer(c(1, 1, 1, 1, 2, 2, 2, 1, 1, 1,
 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2,
 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2,
 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1,
 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1,
 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2,
 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1,
 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2,
 2, 1, 2, 2, 1, 2)), .Label = c(120hr, 24hr), class = factor),
 treatment = structure(as.integer(c(1, 1, 1, 2, 2, 1, 1, 2,
 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1,
 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2,
 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1,
 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2,
 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2,
 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2,
 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1,
 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2,
 2, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1,
 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1,
 2, 1)), .Label = c(control, trt), class = factor),
 expREP = structure(as.integer(c(1, 1, 1, 3, 1, 1, 1, 1, 2,
 2, 1, 1, 2, 2, 1, 3, 1, 3, 3, 3, 1, 2, 1, 2, 2, 2, 2, 3,
 3, 2, 2, 1, 2, 3, 3, 1, 1, 2, 3, 1, 3, 3, 3, 3, 1, 3, 1,
 1, 2, 1, 1, 2, 3, 2, 2, 1, 3, 2, 2, 2, 3, 2, 1, 2, 2, 2,
 2, 1, 1, 1, 3, 2, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 3, 1, 2,
 3, 3, 1, 1, 1, 3, 3, 1, 1, 3, 1, 1, 1, 1, 1, 3, 3, 3, 1,
 1, 1, 2, 3, 2, 2, 3, 2, 2, 2, 1, 1, 1, 3, 3, 2, 2, 2, 1,
 3, 1, 2, 3, 1, 3, 3, 1, 2, 3, 1, 2, 1, 3, 1, 3, 3, 2, 2,
 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 3, 1, 1, 1,
 1, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 1, 1, 1, 1, 3,
 3, 1, 1, 1, 3, 2, 1, 1, 2, 1, 3, 2, 1, 2, 1, 3, 1, 1, 2,
 3)), .Label = c(expREP1, expREP2, expREP3), class =
 factor), techREP = structure(as.integer(c(3, 2, 1, 1, 1, 3, 1,
 3, 3, 2, 2, 1, 1, 3, 2, 3, 3, 1, 2, 1, 2, 1, 3, 1, 3, 2, 2, 3, 1,
 1, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 3, 1, 2, 3, 1, 3, 2, 1,
 1, 2, 2, 3, 3, 3, 2, 1, 2, 1, 2, 3, 2, 3, 2, 3, 1, 3, 2, 2, 1, 2,
 1, 3, 2, 2, 1, 1, 3, 3, 3, 1, 3, 1, 3, 2, 2, 2, 1, 2, 1, 3, 1, 3,
 2, 1, 3, 1, 3, 2, 3, 1, 1, 2, 3, 1, 1, 3, 3, 2, 2, 1, 2, 3, 2, 2,
 3, 2, 2, 1, 2, 2, 3, 3, 3, 1, 1, 1, 3, 1, 1, 2, 3, 2, 3, 3, 1, 1,
 3, 2, 3, 3, 3, 3, 1, 3, 2, 1, 3, 3, 1, 3, 2, 1, 2, 2, 1, 2, 1, 2,
 1, 1, 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, 3, 3, 1, 2, 2, 1, 3, 2, 3, 3,
 2, 2, 2, 3, 2, 1, 3, 1, 3, 1, 3, 1, 1, 1, 2, 2, 3)), .Label =
 c(techREP1, techREP2, techREP3), class = factor), log2Abun
 =

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

2007-03-08 Thread hadley wickham

 Hello R-users
 The help I received from Petr helped me created this solution to my problems.

 t1-with(mydata ,aggregate(mydata$Y,
 list(mydata$time,mydata$treatment, mydata$expREP, mydata$techREP) ,
 median, na.rm=T)) ### find median by factors 

 colnames(t1)-c(time,treatment,expREP,techREP,Y50) ### column name 
 ##

 newdata-merge(mydata, t1, by.x= names(mydata)[2:5],
 by.y=names(t1)[1:4], all=T)


Another way is to use the reshape package, http://had.co.nz/reshape

library(reshape)
molten - melt(mydata, m=log2Abun)

cast(molten, time + treatment +  expREP + techREP ~ ., median)

# You can also create many other shapes easily:
cast(molten, expREP + techREP ~ time + treatment , median)
cast(molten, expREP + techREP ~ time + treatment , median, margins=TRUE)

Hadley

__
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

[R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

2007-03-07 Thread ALAN SMITH

Hello R users,

Problem...I do not understand how to use aggregate,by, or the
appropriate apply to perform a function on data with more than one
factor on unbalanced data...
I have a data frame in the long format that does not contain balanced
data. The ID is a unique identifier corresponding to the experimental
unit that will later be examined by ANOVA, T-tests etc. Y is the data
generated from the experiment.  The factors represent the differences
between each sample or run measured.

str(mydata)  ### sample of table at bottom of email ###
'data.frame':   129982 obs. of  6 variables:
 $ ID: num  7 7 7 7 7 7 8 8 8 8 ...
 $ time : Factor w/ 2 levels 120hr,24hr: 1 1 1 1 2 2 2 1 1 1 ...
 $ treatment: Factor w/ 2 levels control,trt: 1 1 1 2 2 1 1 2 1 1 ...
 $ expREP   : Factor w/ 3 levels expREP1,expREP2,..: 1 1 1 3 1 1 1 1 2 2 ...
 $ techREP  : Factor w/ 3 levels techREP1,techREP2,..: 3 2 1 1 1 3
1 3 3 2 ...
 $ Y : num  14.4 14.1 14.2 13.8 14.1 ...

Could someone please help with doing something like the following
1. I would like to find the median for each unique combination of
factors using the data in the   long format (like finding the median
of a single column of data).
2. Create a new column where the median is repeated for the number of
rows of the unique factor combination
3. I would like to learn the most efficient way to do this because I
want to avoid recreating the table from scratch with many commands
like the series below. I will have to perform this operation on many
different data sets some, with many more factors then this example.

### help me learn to use an apply or other command that will do the
following #
m0-mydata$cpdID[mydata$time==24hr  mydata$treatment==control 
mydata$expREP==expREP1  mydata$techREP==techREP1]
m1-mydata$Y[mydata$time==24hr  mydata$treatment==control 
mydata$expREP==expREP1  mydata$techREP==techREP1]
m2-median(m1)
m3-cbind(ID=m0,time=rep(24hr,length(m1)),
treatment=rep(control,length(m1)), expREP=rep(expREP1,length(m1)),
techREP=rep(techREP1,length(m1)),Y=m1,Y50=rep(m2,length(m1)))
# I would like to avoid writing the above hundreds of times ##

I am able to reshape into wide format and then find the column
medians. However restacking the data and regenerating the factors
becomes very very messy on data sets with 150 columns.  I am able to
preform this analysis is SAS easily using BY, but I would like to know
how to do it in R.


I have tried these commands in a number of different variations with
no luck and similar error messages
 test1-aggregate(mydata[,-1],
list(mydata$time,mydata$treatment,mydata$expREP,mydata$techREP)
,median, na.rm=T)
Error in median.default(X[[1]], ...) : need numeric data ### Y in numeric

test1-by(mydata[,-1],
list(mydata$time,mydata$treatment,mydata$expREP,mydata$techREP)
,median, na.rm=T)
Error in median.default(data[x, ], ...) : need numeric data

Thanks
Alan
winXP R 2.4.1


#Example data frame##
mydata-as.data.frame(structure(list(cpdID = c(7, 7, 7, 7, 7, 7, 8, 8,
8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 19, 19, 19, 19, 19, 19, 23, 23, 23, 23,
23, 23, 23, 23, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47), time = structure(as.integer(c(1,
1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2,
1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2,
1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2,
2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2,
2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2,
2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2,
2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1,
1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2,
2, 2, 1, 2, 2, 1, 2, 2, 1, 2)), .Label = c(120hr, 24hr), class = factor),
treatment = structure(as.integer(c(1, 1, 1, 2, 2, 1, 1, 2,
1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1,
2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2,
1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1,
1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2,
2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2,
1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2,
2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1,
1, 2, 2, 1, 2, 2, 2, 2, 2, 2,

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

2007-03-07 Thread jim holtman

Here is one way of doing it:

 # create the rows for each unique combination
 x.split - split(seq(nrow(mydata)), list(mydata$time, mydata$treatment,
+ mydata$expREP, mydata$techREP), drop=TRUE)
 # now go through the list of indices and add the median
 mydata$Y50 - 0   # add the dummy median column
 for (i in x.split){
+ mydata$Y50[i] - median(mydata$Y[i])  # median for each group
+ }
 head(mydata,20)
   cpdID  time treatment  expREP  techREPY  Y50
1  7 120hr   control expREP1 techREP3 14.42331 15.74599
2  7 120hr   control expREP1 techREP2 14.05282 15.10810
3  7 120hr   control expREP1 techREP1 14.22814 14.63248
4  7 120hr   trt expREP3 techREP1 13.84921 15.08641
5  7  24hr   trt expREP1 techREP1 14.07648 15.17235
6  7  24hr   control expREP1 techREP3 14.21394 14.63314
7  8  24hr   control expREP1 techREP1 14.33992 14.81328
8  8 120hr   trt expREP1 techREP3 14.36256 15.34493
9  8 120hr   control expREP2 techREP3 14.01419 15.14270
10 8 120hr   control expREP2 techREP2 14.09803 15.10079
11 8 120hr   trt expREP1 techREP2 14.31522 15.39152
12 8 120hr   trt expREP1 techREP1 14.45288 14.65430
13 8  24hr   control expREP2 techREP1 13.95919 14.71188
14 8  24hr   trt expREP2 techREP3 14.40640 14.36332
15 8  24hr   trt expREP1 techREP2 14.04038 14.42856
16 8  24hr   control expREP3 techREP3 14.22859 15.08463
17 8  24hr   trt expREP1 techREP3 14.12598 14.53840
18 8  24hr   trt expREP3 techREP1 13.59257 14.69984
19 8  24hr   trt expREP3 techREP2 13.58308 14.85730
2010 120hr   control expREP3 techREP1 13.02808 14.07448




On 3/7/07, ALAN SMITH [EMAIL PROTECTED] wrote:

 Hello R users,

 Problem...I do not understand how to use aggregate,by, or the
 appropriate apply to perform a function on data with more than one
 factor on unbalanced data...
 I have a data frame in the long format that does not contain balanced
 data. The ID is a unique identifier corresponding to the experimental
 unit that will later be examined by ANOVA, T-tests etc. Y is the data
 generated from the experiment.  The factors represent the differences
 between each sample or run measured.

 str(mydata)  ### sample of table at bottom of email ###
 'data.frame':   129982 obs. of  6 variables:
 $ ID: num  7 7 7 7 7 7 8 8 8 8 ...
 $ time : Factor w/ 2 levels 120hr,24hr: 1 1 1 1 2 2 2 1 1 1 ...
 $ treatment: Factor w/ 2 levels control,trt: 1 1 1 2 2 1 1 2 1 1 ...
 $ expREP   : Factor w/ 3 levels expREP1,expREP2,..: 1 1 1 3 1 1 1 1 2
 2 ...
 $ techREP  : Factor w/ 3 levels techREP1,techREP2,..: 3 2 1 1 1 3
 1 3 3 2 ...
 $ Y : num  14.4 14.1 14.2 13.8 14.1 ...

 Could someone please help with doing something like the following
 1. I would like to find the median for each unique combination of
 factors using the data in the   long format (like finding the median
 of a single column of data).
 2. Create a new column where the median is repeated for the number of
 rows of the unique factor combination
 3. I would like to learn the most efficient way to do this because I
 want to avoid recreating the table from scratch with many commands
 like the series below. I will have to perform this operation on many
 different data sets some, with many more factors then this example.

 ### help me learn to use an apply or other command that will do the
 following #
 m0-mydata$cpdID[mydata$time==24hr  mydata$treatment==control 
 mydata$expREP==expREP1  mydata$techREP==techREP1]
 m1-mydata$Y[mydata$time==24hr  mydata$treatment==control 
 mydata$expREP==expREP1  mydata$techREP==techREP1]
 m2-median(m1)
 m3-cbind(ID=m0,time=rep(24hr,length(m1)),
 treatment=rep(control,length(m1)), expREP=rep(expREP1,length(m1)),
 techREP=rep(techREP1,length(m1)),Y=m1,Y50=rep(m2,length(m1)))
 # I would like to avoid writing the above hundreds of times ##

 I am able to reshape into wide format and then find the column
 medians. However restacking the data and regenerating the factors
 becomes very very messy on data sets with 150 columns.  I am able to
 preform this analysis is SAS easily using BY, but I would like to know
 how to do it in R.


 I have tried these commands in a number of different variations with
 no luck and similar error messages
 test1-aggregate(mydata[,-1],
 list(mydata$time,mydata$treatment,mydata$expREP,mydata$techREP)
 ,median, na.rm=T)
 Error in median.default(X[[1]], ...) : need numeric data ### Y in
 numeric

 test1-by(mydata[,-1],
 list(mydata$time,mydata$treatment,mydata$expREP,mydata$techREP)
 ,median, na.rm=T)
 Error in median.default(data[x, ], ...) : need numeric data

 Thanks
 Alan
 winXP R 2.4.1


 #Example data frame##
 mydata-as.data.frame(structure(list(cpdID = c(7, 7, 7, 7, 7, 7, 8, 8,
 8, 8, 8, 8,
 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 10, 10, 10, 10, 10, 10, 19, 19, 19,

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

2007-03-07 Thread Petr Pikal

Hi

you can use aggregate to create table of medians

with(mydata, aggregate(Y, list(time, tratment, expRep,), median)

repeats of unique factors
either by rle or aggregate with length function

Then you can do replication by

norep - rep(your.median, each = your replicates)

Regards
Petr

On 7 Mar 2007 at 17:25, ALAN SMITH wrote:

Date sent:  Wed, 7 Mar 2007 17:25:54 -0600
From:   ALAN SMITH [EMAIL PROTECTED]
To: r-help@stat.math.ethz.ch
Subject:[R] how to apply functions to unbalanced data in long 
format by
factors..cant get by or aggregate to work

 Hello R users,
 
 Problem...I do not understand how to use aggregate,by, or the
 appropriate apply to perform a function on data with more than one
 factor on unbalanced data... I have a data frame in the long format
 that does not contain balanced data. The ID is a unique identifier
 corresponding to the experimental unit that will later be examined by
 ANOVA, T-tests etc. Y is the data generated from the experiment.  The
 factors represent the differences between each sample or run
 measured.
 
 str(mydata)  ### sample of table at bottom of email ###
 'data.frame':   129982 obs. of  6 variables:
  $ ID: num  7 7 7 7 7 7 8 8 8 8 ...
  $ time : Factor w/ 2 levels 120hr,24hr: 1 1 1 1 2 2 2 1 1 1
  ... $ treatment: Factor w/ 2 levels control,trt: 1 1 1 2 2 1 1 2
  1 1 ... $ expREP   : Factor w/ 3 levels expREP1,expREP2,..: 1 1 1
  3 1 1 1 1 2 2 ... $ techREP  : Factor w/ 3 levels
  techREP1,techREP2,..: 3 2 1 1 1 3
 1 3 3 2 ...
  $ Y : num  14.4 14.1 14.2 13.8 14.1 ...
 
 Could someone please help with doing something like the following 1. I
 would like to find the median for each unique combination of factors
 using the data in the   long format (like finding the median of a
 single column of data). 2. Create a new column where the median is
 repeated for the number of rows of the unique factor combination 3. I
 would like to learn the most efficient way to do this because I want
 to avoid recreating the table from scratch with many commands like the
 series below. I will have to perform this operation on many different
 data sets some, with many more factors then this example.
 
 ### help me learn to use an apply or other command that will do the
 following #
 m0-mydata$cpdID[mydata$time==24hr  mydata$treatment==control 
 mydata$expREP==expREP1  mydata$techREP==techREP1]
 m1-mydata$Y[mydata$time==24hr  mydata$treatment==control 
 mydata$expREP==expREP1  mydata$techREP==techREP1] m2-median(m1)
 m3-cbind(ID=m0,time=rep(24hr,length(m1)),
 treatment=rep(control,length(m1)), expREP=rep(expREP1,length(m1)),
 techREP=rep(techREP1,length(m1)),Y=m1,Y50=rep(m2,length(m1)))
 # I would like to avoid writing the above hundreds of times
 ##
 
 I am able to reshape into wide format and then find the column
 medians. However restacking the data and regenerating the factors
 becomes very very messy on data sets with 150 columns.  I am able to
 preform this analysis is SAS easily using BY, but I would like to know
 how to do it in R.
 
 
 I have tried these commands in a number of different variations with
 no luck and similar error messages
  test1-aggregate(mydata[,-1],
 list(mydata$time,mydata$treatment,mydata$expREP,mydata$techREP)
 ,median, na.rm=T)
 Error in median.default(X[[1]], ...) : need numeric data ### Y in
 numeric
 
 test1-by(mydata[,-1],
 list(mydata$time,mydata$treatment,mydata$expREP,mydata$techREP)
 ,median, na.rm=T)
 Error in median.default(data[x, ], ...) : need numeric data
 
 Thanks
 Alan
 winXP R 2.4.1
 
 
 #Example data frame##
 mydata-as.data.frame(structure(list(cpdID = c(7, 7, 7, 7, 7, 7, 8, 8,
 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 10, 10, 10, 10, 10, 10, 19, 19, 19, 19, 19, 19, 23, 23, 23, 23, 23,
 23, 23, 23, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
 33, 33, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
 40, 40, 40, 40, 40, 40, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 47, 47,
 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
 47, 47), time = structure(as.integer(c(1, 1, 1, 1, 2, 2, 2, 1, 1, 1,
 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2,
 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2,
 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1,
 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1,
 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2,
 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

[R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

Re: [R] how to apply functions to unbalanced data in long format by factors......cant get by or aggregate to work

5 matches

Site Navigation

Mail list logo

Footer information