[R-sig-Geo] How to parallize my code?

ping yang Sun, 02 Feb 2014 11:10:40 -0800

Hi r-sig-geo,



I am using the following script to extraction weather information for each
point (from a rasterized shapefile for the COUS) which generate about
460,000 files:

Originally I read the coordinates from a shapefile:

Library(rgdal)

pl <- readOGR(".","US_2_points")



then I used the following code to extract the information from three NetCDF
files that contains the tmin,tmax and pr for each year :

for (year in 1981:2012)

  {

    #get the combined netCDF file

    tminfile <- paste("tmin","_",year,".nc",sep='')

    b_tmin <- brick(tminfile,varname='tmin')

    pptfile <- paste("ppt","_",year,".nc",sep='')

    b_ppt <- brick(pptfile,varname='ppt')

    tmaxfile <- paste("tmax","_",year,".nc",sep='')

    b_tmax <- brick(tmaxfile,varname='tmax')

    #Get the first year here!!!

    print(paste("processing year :",year,sep=''))

    for(l in 1:length(pl))

    {

      v <- NULL

      #generate file with the name convention with
t_n(latitude)w(longitude).txt, 5 digits after point should be work

      filename <-
paste("e:/PRISM/US3/N",round(coordinates(pl[l,])[2],5),"W",abs(round(coordinates(pl[l,])[1],5)),".wth",sep='')


      print(paste("processing file :",filename,sep=''))

      tmin <-
as.numeric(round(extract(b_tmin,coordinates(pl[l,])),digits=1))

      tmax <-
as.numeric(round(extract(b_tmax,coordinates(pl[l,])),digits=1))

      ppt <- as.numeric(round(extract(b_ppt,coordinates(pl[l,])),digits=2))

      v <- cbind(tmax,tmin,ppt)

      tablename <- c("tmin","tmax","ppt")

      v <- data.frame(v)

      colnames(v) <- tablename

      v["default"] <- 0

      v["year"] <- year

      date <-
seq(as.Date(paste(year,"/1/1",sep='')),as.Date(paste(year,"/12/31",sep='')),"days")

      month <- as.numeric(substr(date,6,7))

      day   <- as.numeric(substr(date,9,10))

      v["month"] <- month

      v["day"]  <-  day

      v <- v[c("year","month","day","default","tmin","tmax","ppt")]

      #write into a file with the fixed wide format

      if(file.existes(filename))

      {


write.fwf(x=v,filename,append=TRUE,na="NA",rownames=FALSE,colnames=FALSE,width=c(6,3,3,5,5,5,6))

      }

      else

      {


write.fwf(x=v,filename,append=FALSE,na="NA",rownames=FALSE,colnames=FALSE,width=c(6,3,3,5,5,5,6))

      }



I found it will takes weeks to finish, then I divide the points from the
shape file into each State(to generate a shape file for each state for the
points), and using the following function:



weather4Point <- function(state)

{

  #get the point file for the state

  setwd("c:/PRISM/US1Points/")

  pl <- readOGR(".",paste("points4_",state,sep=''))

  setwd("e:/PRISM/NetCDF/")

  for (year in 1981:2012)

  {

    #get the combined netCDF file

    tminfile <- paste("tmin","_",year,".nc",sep='')

    b_tmin <- brick(tminfile,varname='tmin')

    pptfile <- paste("ppt","_",year,".nc",sep='')

    b_ppt <- brick(pptfile,varname='ppt')

    tmaxfile <- paste("tmax","_",year,".nc",sep='')

    b_tmax <- brick(tmaxfile,varname='tmax')

    #Get the first year here!!!

    print(paste("processing year :",year,sep=''))

    for(l in 1:length(pl))

    {

      v <- NULL

      #generate file with the name convention with
t_n(latitude)w(longitude).txt, 5 digits after point should be work

      filename <-
paste("e:/PRISM/US3/N",round(coordinates(pl[l,])[2],5),"W",abs(round(coordinates(pl[l,])[1],5)),".wth",sep='')


      print(paste("processing file :",filename,sep=''))

      tmin <-
as.numeric(round(extract(b_tmin,coordinates(pl[l,])),digits=1))

      tmax <-
as.numeric(round(extract(b_tmax,coordinates(pl[l,])),digits=1))

      ppt <- as.numeric(round(extract(b_ppt,coordinates(pl[l,])),digits=2))

      v <- cbind(tmax,tmin,ppt)

      tablename <- c("tmin","tmax","ppt")

      v <- data.frame(v)

      colnames(v) <- tablename

      v["default"] <- 0

      v["year"] <- year

      date <-
seq(as.Date(paste(year,"/1/1",sep='')),as.Date(paste(year,"/12/31",sep='')),"days")

      month <- as.numeric(substr(date,6,7))

      day   <- as.numeric(substr(date,9,10))

      v["month"] <- month

      v["day"]  <-  day

      v <- v[c("year","month","day","default","tmin","tmax","ppt")]

      v

      #print(paste(v), zero.print = ".")

      #write into a file with the APEX format

      if(file.existes(filename))

      {


write.fwf(x=v,filename,append=TRUE,na="NA",rownames=FALSE,colnames=FALSE,width=c(6,3,3,5,5,5,6))

      }

      else

      {


write.fwf(x=v,filename,append=FALSE,na="NA",rownames=FALSE,colnames=FALSE,width=c(6,3,3,5,5,5,6))

      }

    }

  }

}



Then using the following code by opening several R session to make this
process "parallel".



States <- c("AL","AZ","AR","CO","CT","DC","FL","GA","ID","IA",

            "IN","KS","KY","LA","MA","ME","MI","MN","MT","NC")

for (state in States)

{

  weather4Point(state)

}



However, I still feel slow when I plan to do for the whole US. I am asking
here is there a better solution to speed up this process(any possibility to
employ parallel programming in R) ?



Environment information:

R version 3.0.1 (2013-05-16)

Platform: x86_64-w64-mingw32/x64 (64-bit)

Memory: 16 GB

Processor: Intel(R) Core(tm) i7-3770 CPU @ 3.4Ghz 3.4Ghz



Looking forward to any suggestions?



Regards,



Ping

        [[alternative HTML version deleted]]

_______________________________________________
R-sig-Geo mailing list
[email protected]
https://stat.ethz.ch/mailman/listinfo/r-sig-geo

[R-sig-Geo] How to parallize my code?

Reply via email to