[R] Function that is giving me a headache- any help appreciated (automatic read )

2010-05-18 Thread stephen sefick
note: whole function is below- I am sure I am doing something silly.

when I use it like USGS(input=precipitation) it is choking on the


precip.1 - subset(DF, precipitation!=NA)
b - ddply(precip.1$precipitation, .(precip.1$gauge_name), cumsum)
DF.precip - precip.1
DF.precip$precipitation - b$.data

part, but runs fine outside of the function:

days=7
input=precipitation
require(chron)
require(gsubfn)
require(ggplot2)
require(plyr)
#021973269 is the Waynesboro Gauge on the Savannah River Proper (SRS)
#02102908 is the Flat Creek Gauge (ftbrfcms)
#02133500 is the Drowning Creek (ftbrbmcm)
#02341800 is the Upatoi Creek Near Columbus (ftbn)
#02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
#02203000 is the Canoochee River Near Claxton (ftst)
#02196690 is the Horse Creek Gauge at Clearwater, S.C.

a - http://waterdata.usgs.gov/nwis/uv?format=rdbperiod=;
b - site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690
z - paste(a, days, b, sep=)
L - readLines(z)

#look for the data with USGS in front of it (this take advantage of
#the agency column)
L.USGS - grep(^USGS, L, value = TRUE)
DF - read.table(textConnection(L.USGS), fill = TRUE)
colnames(DF) - c(agency, gauge, date, time, time_zone,
gauge_height,
discharge, precipitation)
pat - ^# +USGS +([0-9]+) +(.*)
L.DD - grep(pat, L, value = TRUE)
library(gsubfn)
DD - strapply(L.DD, pat, c, simplify = rbind)
DDdf - data.frame(gauge = as.numeric(DD[,1]), gauge_name = DD[,2])
both - merge(DF, DDdf, by = gauge, all.x = TRUE)

dts - as.character(both[,date])
tms - as.character(both[,time])
date_time - as.chron(paste(dts, tms), %Y-%m-%d %H:%M)
DF - data.frame(Date=as.POSIXct(date_time), both)
#change precip to numeric
DF[,precipitation] - as.numeric(as.character(DF[,precipitation]))

precip.1 - subset(DF, precipitation!=NA)
b - ddply(precip.1$precipitation, .(precip.1$gauge_name), cumsum)
DF.precip - precip.1
DF.precip$precipitation - b$.data

#discharge
if(input==data){

return(DF)

}else{

qplot(Date, discharge, data=DF,
geom=line, ylab=Date)+facet_wrap(~gauge_name,
scales=free_y)+coord_trans(y=log10)}

if(input==precipitation){
#precipitation
qplot(Date, precipitation, data=DF.precip,
geom=line)+facet_wrap(~gauge_name, scales=free_y)

}else{

qplot(Date, discharge, data=DF,
geom=line, ylab=Date)+facet_wrap(~gauge_name,
scales=free_y)+coord_trans(y=log10)}

below is the whole function:

USGS - function(input=discharge, days=7){
require(chron)
require(gsubfn)
require(ggplot2)
require(plyr)
#021973269 is the Waynesboro Gauge on the Savannah River Proper (SRS)
#02102908 is the Flat Creek Gauge (ftbrfcms)
#02133500 is the Drowning Creek (ftbrbmcm)
#02341800 is the Upatoi Creek Near Columbus (ftbn)
#02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
#02203000 is the Canoochee River Near Claxton (ftst)
#02196690 is the Horse Creek Gauge at Clearwater, S.C.

a - http://waterdata.usgs.gov/nwis/uv?format=rdbperiod=;
b - site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690
z - paste(a, days, b, sep=)
L - readLines(z)

#look for the data with USGS in front of it (this take advantage of
#the agency column)
L.USGS - grep(^USGS, L, value = TRUE)
DF - read.table(textConnection(L.USGS), fill = TRUE)
colnames(DF) - c(agency, gauge, date, time, time_zone,
gauge_height,
discharge, precipitation)
pat - ^# +USGS +([0-9]+) +(.*)
L.DD - grep(pat, L, value = TRUE)
library(gsubfn)
DD - strapply(L.DD, pat, c, simplify = rbind)
DDdf - data.frame(gauge = as.numeric(DD[,1]), gauge_name = DD[,2])
both - merge(DF, DDdf, by = gauge, all.x = TRUE)

dts - as.character(both[,date])
tms - as.character(both[,time])
date_time - as.chron(paste(dts, tms), %Y-%m-%d %H:%M)
DF - data.frame(Date=as.POSIXct(date_time), both)
#change precip to numeric
DF[,precipitation] - as.numeric(as.character(DF[,precipitation]))

precip.1 - subset(DF, precipitation!=NA)
b - ddply(precip.1$precipitation, .(precip.1$gauge_name), cumsum)
DF.precip - precip.1
DF.precip$precipitation - b$.data

#discharge
if(input==data){

return(DF)

}else{

qplot(Date, discharge, data=DF,
geom=line, ylab=Date)+facet_wrap(~gauge_name,
scales=free_y)+coord_trans(y=log10)}

if(input==precipitation){
#precipitation
qplot(Date, precipitation, data=DF.precip,
geom=line)+facet_wrap(~gauge_name, scales=free_y)

}else{

qplot(Date, discharge, data=DF,
geom=line, ylab=Date)+facet_wrap(~gauge_name,
scales=free_y)+coord_trans(y=log10)}

}


-- 
Stephen Sefick

Let's not spend our time and resources thinking about things that are
so little or so large that all they really do for us is puff us up and
make us feel like gods.  We are mammals, and have not exhausted the
annoying little problems of being mammals.

-K. Mullis

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, 

Re: [R] Function that is giving me a headache- any help appreciated (automatic read )

2010-05-18 Thread John Kane

I don't think you can do this
precipitation!=NA)

have a look at ?is.na

--- On Tue, 5/18/10, stephen sefick ssef...@gmail.com wrote:

 From: stephen sefick ssef...@gmail.com
 Subject: [R] Function that is giving me a headache- any help appreciated 
 (automatic read )
 To: r-help@r-project.org
 Received: Tuesday, May 18, 2010, 12:38 PM
 note: whole function is below- I am
 sure I am doing something silly.
 
 when I use it like USGS(input=precipitation) it is
 choking on the
 
 
 precip.1 - subset(DF, precipitation!=NA)
 b - ddply(precip.1$precipitation,
 .(precip.1$gauge_name), cumsum)
 DF.precip - precip.1
 DF.precip$precipitation - b$.data
 
 part, but runs fine outside of the function:
 
 days=7
 input=precipitation
 require(chron)
 require(gsubfn)
 require(ggplot2)
 require(plyr)
 #021973269 is the Waynesboro Gauge on the Savannah River
 Proper (SRS)
 #02102908 is the Flat Creek Gauge (ftbrfcms)
 #02133500 is the Drowning Creek (ftbrbmcm)
 #02341800 is the Upatoi Creek Near Columbus (ftbn)
 #02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
 #02203000 is the Canoochee River Near Claxton (ftst)
 #02196690 is the Horse Creek Gauge at Clearwater, S.C.
 
 a - http://waterdata.usgs.gov/nwis/uv?format=rdbperiod=;
 b -
 site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690
 z - paste(a, days, b, sep=)
 L - readLines(z)
 
 #look for the data with USGS in front of it (this take
 advantage of
 #the agency column)
 L.USGS - grep(^USGS, L, value = TRUE)
 DF - read.table(textConnection(L.USGS), fill = TRUE)
 colnames(DF) - c(agency, gauge, date, time,
 time_zone,
 gauge_height,
 discharge, precipitation)
 pat - ^# +USGS +([0-9]+) +(.*)
 L.DD - grep(pat, L, value = TRUE)
 library(gsubfn)
 DD - strapply(L.DD, pat, c, simplify = rbind)
 DDdf - data.frame(gauge = as.numeric(DD[,1]),
 gauge_name = DD[,2])
 both - merge(DF, DDdf, by = gauge, all.x = TRUE)
 
 dts - as.character(both[,date])
 tms - as.character(both[,time])
 date_time - as.chron(paste(dts, tms), %Y-%m-%d
 %H:%M)
 DF - data.frame(Date=as.POSIXct(date_time), both)
 #change precip to numeric
 DF[,precipitation] -
 as.numeric(as.character(DF[,precipitation]))
 
 precip.1 - subset(DF, precipitation!=NA)
 b - ddply(precip.1$precipitation,
 .(precip.1$gauge_name), cumsum)
 DF.precip - precip.1
 DF.precip$precipitation - b$.data
 
 #discharge
 if(input==data){
 
 return(DF)
 
 }else{
 
 qplot(Date, discharge, data=DF,
 geom=line, ylab=Date)+facet_wrap(~gauge_name,
 scales=free_y)+coord_trans(y=log10)}
 
 if(input==precipitation){
 #precipitation
 qplot(Date, precipitation, data=DF.precip,
 geom=line)+facet_wrap(~gauge_name, scales=free_y)
 
 }else{
 
 qplot(Date, discharge, data=DF,
 geom=line, ylab=Date)+facet_wrap(~gauge_name,
 scales=free_y)+coord_trans(y=log10)}
 
 below is the whole function:
 
 USGS - function(input=discharge, days=7){
 require(chron)
 require(gsubfn)
 require(ggplot2)
 require(plyr)
 #021973269 is the Waynesboro Gauge on the Savannah River
 Proper (SRS)
 #02102908 is the Flat Creek Gauge (ftbrfcms)
 #02133500 is the Drowning Creek (ftbrbmcm)
 #02341800 is the Upatoi Creek Near Columbus (ftbn)
 #02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
 #02203000 is the Canoochee River Near Claxton (ftst)
 #02196690 is the Horse Creek Gauge at Clearwater, S.C.
 
 a - http://waterdata.usgs.gov/nwis/uv?format=rdbperiod=;
 b -
 site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690
 z - paste(a, days, b, sep=)
 L - readLines(z)
 
 #look for the data with USGS in front of it (this take
 advantage of
 #the agency column)
 L.USGS - grep(^USGS, L, value = TRUE)
 DF - read.table(textConnection(L.USGS), fill = TRUE)
 colnames(DF) - c(agency, gauge, date, time,
 time_zone,
 gauge_height,
 discharge, precipitation)
 pat - ^# +USGS +([0-9]+) +(.*)
 L.DD - grep(pat, L, value = TRUE)
 library(gsubfn)
 DD - strapply(L.DD, pat, c, simplify = rbind)
 DDdf - data.frame(gauge = as.numeric(DD[,1]),
 gauge_name = DD[,2])
 both - merge(DF, DDdf, by = gauge, all.x = TRUE)
 
 dts - as.character(both[,date])
 tms - as.character(both[,time])
 date_time - as.chron(paste(dts, tms), %Y-%m-%d
 %H:%M)
 DF - data.frame(Date=as.POSIXct(date_time), both)
 #change precip to numeric
 DF[,precipitation] -
 as.numeric(as.character(DF[,precipitation]))
 
 precip.1 - subset(DF, precipitation!=NA)
 b - ddply(precip.1$precipitation,
 .(precip.1$gauge_name), cumsum)
 DF.precip - precip.1
 DF.precip$precipitation - b$.data
 
 #discharge
 if(input==data){
 
 return(DF)
 
 }else{
 
 qplot(Date, discharge, data=DF,
 geom=line, ylab=Date)+facet_wrap(~gauge_name,
 scales=free_y)+coord_trans(y=log10)}
 
 if(input==precipitation){
 #precipitation
 qplot(Date, precipitation, data=DF.precip,
 geom=line)+facet_wrap(~gauge_name, scales=free_y)
 
 }else{
 
 qplot(Date, discharge, data=DF,
 geom=line, ylab=Date)+facet_wrap(~gauge_name,
 scales=free_y)+coord_trans(y=log10)}
 
 }
 
 
 -- 
 Stephen Sefick
 
 Let's not spend our time and resources thinking about

Re: [R] Function that is giving me a headache- any help appreciated (automatic read )

2010-05-18 Thread Peter Ehlers

On 2010-05-18 11:00, John Kane wrote:


I don't think you can do this
precipitation!=NA)


Actually, that will work here, although it should always be avoided.
Do use is.na().

The main problem seems to be that the ddply() call doesn't work.
I would just use tapply() and unlist():

 b - with(precip.1, tapply(precipitation, gauge_name, cumsum))
 b - unlist(b)

You'll also find that you need to wrap your qplot() calls in print().

And your if/else logic (at the end of USGS) looks faulty.

 -Peter Ehlers



have a look at ?is.na

--- On Tue, 5/18/10, stephen sefickssef...@gmail.com  wrote:


From: stephen sefickssef...@gmail.com
Subject: [R] Function that is giving me a headache- any help appreciated 
(automatic read )
To: r-help@r-project.org
Received: Tuesday, May 18, 2010, 12:38 PM
note: whole function is below- I am
sure I am doing something silly.

when I use it like USGS(input=precipitation) it is
choking on the


precip.1- subset(DF, precipitation!=NA)
b- ddply(precip.1$precipitation,
.(precip.1$gauge_name), cumsum)
DF.precip- precip.1
DF.precip$precipitation- b$.data

part, but runs fine outside of the function:

days=7
input=precipitation
require(chron)
require(gsubfn)
require(ggplot2)
require(plyr)
#021973269 is the Waynesboro Gauge on the Savannah River
Proper (SRS)
#02102908 is the Flat Creek Gauge (ftbrfcms)
#02133500 is the Drowning Creek (ftbrbmcm)
#02341800 is the Upatoi Creek Near Columbus (ftbn)
#02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
#02203000 is the Canoochee River Near Claxton (ftst)
#02196690 is the Horse Creek Gauge at Clearwater, S.C.

a- http://waterdata.usgs.gov/nwis/uv?format=rdbperiod=;
b-
site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690
z- paste(a, days, b, sep=)
L- readLines(z)

#look for the data with USGS in front of it (this take
advantage of
#the agency column)
L.USGS- grep(^USGS, L, value = TRUE)
DF- read.table(textConnection(L.USGS), fill = TRUE)
colnames(DF)- c(agency, gauge, date, time,
time_zone,
gauge_height,
discharge, precipitation)
pat- ^# +USGS +([0-9]+) +(.*)
L.DD- grep(pat, L, value = TRUE)
library(gsubfn)
DD- strapply(L.DD, pat, c, simplify = rbind)
DDdf- data.frame(gauge = as.numeric(DD[,1]),
gauge_name = DD[,2])
both- merge(DF, DDdf, by = gauge, all.x = TRUE)

dts- as.character(both[,date])
tms- as.character(both[,time])
date_time- as.chron(paste(dts, tms), %Y-%m-%d
%H:%M)
DF- data.frame(Date=as.POSIXct(date_time), both)
#change precip to numeric
DF[,precipitation]-
as.numeric(as.character(DF[,precipitation]))

precip.1- subset(DF, precipitation!=NA)
b- ddply(precip.1$precipitation,
.(precip.1$gauge_name), cumsum)
DF.precip- precip.1
DF.precip$precipitation- b$.data

#discharge
if(input==data){

return(DF)

}else{

qplot(Date, discharge, data=DF,
geom=line, ylab=Date)+facet_wrap(~gauge_name,
scales=free_y)+coord_trans(y=log10)}

if(input==precipitation){
#precipitation
qplot(Date, precipitation, data=DF.precip,
geom=line)+facet_wrap(~gauge_name, scales=free_y)

}else{

qplot(Date, discharge, data=DF,
geom=line, ylab=Date)+facet_wrap(~gauge_name,
scales=free_y)+coord_trans(y=log10)}

below is the whole function:

USGS- function(input=discharge, days=7){
require(chron)
require(gsubfn)
require(ggplot2)
require(plyr)
#021973269 is the Waynesboro Gauge on the Savannah River
Proper (SRS)
#02102908 is the Flat Creek Gauge (ftbrfcms)
#02133500 is the Drowning Creek (ftbrbmcm)
#02341800 is the Upatoi Creek Near Columbus (ftbn)
#02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
#02203000 is the Canoochee River Near Claxton (ftst)
#02196690 is the Horse Creek Gauge at Clearwater, S.C.

a- http://waterdata.usgs.gov/nwis/uv?format=rdbperiod=;
b-
site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690
z- paste(a, days, b, sep=)
L- readLines(z)

#look for the data with USGS in front of it (this take
advantage of
#the agency column)
L.USGS- grep(^USGS, L, value = TRUE)
DF- read.table(textConnection(L.USGS), fill = TRUE)
colnames(DF)- c(agency, gauge, date, time,
time_zone,
gauge_height,
discharge, precipitation)
pat- ^# +USGS +([0-9]+) +(.*)
L.DD- grep(pat, L, value = TRUE)
library(gsubfn)
DD- strapply(L.DD, pat, c, simplify = rbind)
DDdf- data.frame(gauge = as.numeric(DD[,1]),
gauge_name = DD[,2])
both- merge(DF, DDdf, by = gauge, all.x = TRUE)

dts- as.character(both[,date])
tms- as.character(both[,time])
date_time- as.chron(paste(dts, tms), %Y-%m-%d
%H:%M)
DF- data.frame(Date=as.POSIXct(date_time), both)
#change precip to numeric
DF[,precipitation]-
as.numeric(as.character(DF[,precipitation]))

precip.1- subset(DF, precipitation!=NA)
b- ddply(precip.1$precipitation,
.(precip.1$gauge_name), cumsum)
DF.precip- precip.1
DF.precip$precipitation- b$.data

#discharge
if(input==data){

return(DF)

}else{

qplot(Date, discharge, data=DF,
geom=line, ylab=Date)+facet_wrap(~gauge_name,
scales=free_y)+coord_trans(y=log10)}

if(input==precipitation){
#precipitation
qplot(Date, precipitation, data=DF.precip,
geom=line

Re: [R] Function that is giving me a headache- any help appreciated (automatic read )

2010-05-18 Thread Hadley Wickham
 precip.1 - subset(DF, precipitation!=NA)
 b - ddply(precip.1$precipitation, .(precip.1$gauge_name), cumsum)
 DF.precip - precip.1
 DF.precip$precipitation - b$.data

I suspect what you want here is

ddply(precip.1, gauge_name, transform, precipitation = cumsum(precipitation))

Hadley


-- 
Assistant Professor / Dobelman Family Junior Chair
Department of Statistics / Rice University
http://had.co.nz/

__
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.