Hi,
Q1 solution already sent.
Regarding Q2, one of the files in the new Observed folder doesn't have any
data (just the Year column alone).
That may be the reason for the problem.
### Q1: working directory: Observed #Only one file per Site. Assuming this is
the
### case for the full dataset, then I guess there is no need to average
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "",
list.files(pattern = ".csv")))
lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) {
lines1 <- readLines(x2)
header1 <- lines1[1:2]
dat1 <- read.table(text = lines1, header = FALSE, sep = ",",
stringsAsFactors = FALSE,
skip = 2)
colnames(dat1) <- Reduce(paste, strsplit(header1, ","))
dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))
lst3 <- lst2[sapply(seq_along(lst2),function(i){lstN <-
sapply(lst2[[i]],function(x) is.integer(ncol(x)))})]
#difference in column number
sapply(seq_along(lst3), function(i) {
sapply(lst3[[i]], function(x) ncol(x))
})
#
#[1] 157 258 258 98 157 258 256 258 250 258 258 147 157 250 250 256 249 240
# [19] 181 188 256 146 117 258 153 256 255 246 255 256 258 257 145 258 258 255
# [37] 258 157 164 144 265 258 254 258 258 157 258 176 258 256 257 258 258 258
# [55] 248 258 156 258 157 157 258 258 258 258 258 148 258 258 258 258 257 258
# [73] 258 258 157 154 153 258 248 255 257 256 258 258 157 256 256 257 257 250
# [91] 257 139 155 256 256 257 257 256 258 258 257 258 258 258 258 157 157 157
#[109] 258 258 258 258 256 258 157 258 258 256 258
library(plyr)
library(stringr)
lst4 <- setNames(lapply(seq_along(lst3), function(i) {
lapply(lst3[[i]], function(x) {
names(x)[-1] <- paste(names(x)[-1], names(lst1)[i], sep = "_")
names(x) <- str_trim(names(x))
x
})[[1]]
}), names(lst3))
df1 <- join_all(lst4, by = "Year")
dim(df1)
# [1] 9 27311
sapply(split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1])), function(x) {
df2 <- df1[, x]
df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"),
numcolwise(function(y) quantile(y,
seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE)
ncol(df3)
})
#
#G100 G101 G102 G103 G104 G105 G106 G107 G108 G109 G110 G111 G112 G113 G114
G115
# 157 258 258 98 157 258 256 258 250 258 258 147 157 250 250
256
#G116 G117 G118 G119 G120 GG10 GG11 GG12 GG13 GG14 GG15 GG16 GG17 GG18 GG19
GG20
# 249 240 181 188 256 146 117 258 153 256 255 246 255 256 258
257
#GG21 GG22 GG23 GG24 GG25 GG26 GG27 GG28 GG29 GG30 GG31 GG32 GG33 GG34 GG35
GG36
# 145 258 258 255 258 157 164 144 265 258 254 258 258 157 258
176
#GG37 GG38 GG39 GG40 GG41 GG42 GG43 GG44 GG45 GG46 GG47 GG48 GG49 GG50 GG51
GG52
# 258 256 257 258 258 258 248 258 156 258 157 157 258 258 258
258
#GG53 GG54 GG55 GG56 GG57 GG58 GG59 GG60 GG61 GG62 GG63 GG64 GG65 GG66 GG67
GG68
# 258 148 258 258 258 258 257 258 258 258 157 154 153 258 248
255
#GG69 GG70 GG71 GG72 GG73 GG74 GG75 GG76 GG77 GG78 GG79 GG80 GG81 GG82 GG83
GG84
# 257 256 258 258 157 256 256 257 257 250 257 139 155 256 256
257
#GG85 GG86 GG87 GG88 GG89 GG90 GG91 GG92 GG93 GG94 GG95 GG96 GG97 GG98 GG99
GGG1
# 257 256 258 258 257 258 258 258 258 157 157 157 258 258 258
258
#GGG2 GGG3 GGG4 GGG5 GGG6 GGG7 GGG8
# 256 258 157 258 258 256 258
lst5 <- split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1]))
lapply(seq_along(lst5), function(i) {
df2 <- df1[, lst5[[i]]]
df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"),
numcolwise(function(y) quantile(y,
seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE)
df3[1:3, 1:3]
write.csv(df3, paste0(paste(getwd(), "final", paste(names(lst4)[[i]],
"Quantile",
sep = "_"), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
sapply(ReadOut1, dim)[,1:3]
# [,1] [,2] [,3]
#[1,] 101 101 101
#[2,] 157 258 258
lapply(ReadOut1, function(x) x[1:2, 1:3])[1:3]
#[[1]]
# Percentiles pav.DJF_G100 pav.MAM_G100
#1 0% 0 0.640500
#2 1% 0 0.664604
#
#[[2]]
# Percentiles txav.DJF_G101 txav.MAM_G101
#1 0% -13.8756 4.742400
#2 1% -13.8140 4.817184
#
#[[3]]
# Percentiles txav.DJF_G102 txav.MAM_G102
#1 0% -15.05000 4.520700
#2 1% -14.96833 4.543828
### Q2: Observed data
dir.create("Indices")
names1 <- unlist(lapply(ReadOut1, function(x) names(x)[-1]))
names2 <- gsub("\\_.*", "", names1)
names3 <- unique(gsub("[.]", " ", names2))
res <- do.call(rbind, lapply(seq_along(lst5), function(i) {
df2 <- df1[, lst5[[i]]]
vec1 <- colMeans(df2, na.rm = TRUE)
vec2 <- rep(NA, length(names3))
names(vec2) <- paste(names3, names(lst5)[[i]], sep = "_")
vec2[names(vec2) %in% names(vec1)] <- vec1
names(vec2) <- gsub("\\_.*", "", names(vec2))
vec2
}))
dim(res)
#[1] 119 264
lapply(seq_len(ncol(res)), function(i) {
mat1 <- t(res[, i, drop = FALSE])
colnames(mat1) <- names(lst4)
write.csv(mat1, paste0(paste(getwd(), "Indices", gsub(" ", "_",
rownames(mat1)),
sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
## Output2:
ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
length(ReadOut2)
# [1] 264
list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1]
# [1] 'Indices/pav_ANN.csv'
res[, "pav ANN", drop = FALSE]
ReadOut2[[1]]
Attached is the updated Quantilecode2.txt.
A.K.
On Monday, April 14, 2014 10:41 PM, Zilefac Elvis <zilefacel...@yahoo.com>
wrote:
Hi AK,
Q1) Please try to correct the error using the larger data set (Sample.zip). The
issue is that once you write the codes and restrict it to smaller data sets, I
find it difficult to generalize it to larger data sets.
Q2) From the Quantilecode2.txt you just sent, you forgot to do the following
section using the Observed.zip file. I tried to run the code to section Q1 in
Quantilecode2.txt using a larger data set and received the same error :Error in
2:nrow(lstNew) : argument of length 0. I have attached a larger data set too
for you to generalize the code to suit the larger data set. Please do not
forget to include the code below in the final code of Q2.
Once you fix these two, I should be able to fix the rest following these
examples.
Thanks AK. Sorry for overloading you with much work.
Atem.
#==============================================================================================================
dir.create("Indices")
names1 <- lapply(ReadOut1, function(x) names(x))[[1]]
lstNew <- simplify2array(ReadOut1) lapply(2:nrow(lstNew), function(i) { dat1 <-
data.frame(lstNew[1], do.call(cbind, lstNew[i, ]), stringsAsFactors = FALSE)
colnames(dat1) <- c(rownames(lstNew)[1], paste(names(lst1),
rep(rownames(lstNew)[i], length(lst1)), sep = "_"))
write.csv(dat1, paste0(paste(getwd(), "Indices", rownames(lstNew)[i], sep =
"/"), ".csv"), row.names = FALSE, quote = FALSE)
})
## Output2:
ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE,
stringsAsFactors = FALSE))
length(ReadOut2)
# [1] 257
head(ReadOut2[[1]], 2)
#==============================================================================================================
=================================================
### Q1: working directory: Observed #Only one file per Site. Assuming this is
the
### case for the full dataset, then I guess there is no need to average
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "",
list.files(pattern = ".csv")))
lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) {
lines1 <- readLines(x2)
header1 <- lines1[1:2]
dat1 <- read.table(text = lines1, header = FALSE, sep = ",",
stringsAsFactors = FALSE,
skip = 2)
colnames(dat1) <- Reduce(paste, strsplit(header1, ","))
dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))
lst3 <- lst2[sapply(seq_along(lst2),function(i){lstN <-
sapply(lst2[[i]],function(x) is.integer(ncol(x)))})]
#difference in column number
sapply(seq_along(lst3), function(i) {
sapply(lst3[[i]], function(x) ncol(x))
})
#
#[1] 157 258 258 98 157 258 256 258 250 258 258 147 157 250 250 256 249 240
# [19] 181 188 256 146 117 258 153 256 255 246 255 256 258 257 145 258 258 255
# [37] 258 157 164 144 265 258 254 258 258 157 258 176 258 256 257 258 258 258
# [55] 248 258 156 258 157 157 258 258 258 258 258 148 258 258 258 258 257 258
# [73] 258 258 157 154 153 258 248 255 257 256 258 258 157 256 256 257 257 250
# [91] 257 139 155 256 256 257 257 256 258 258 257 258 258 258 258 157 157 157
#[109] 258 258 258 258 256 258 157 258 258 256 258
library(plyr)
library(stringr)
lst4 <- setNames(lapply(seq_along(lst3), function(i) {
lapply(lst3[[i]], function(x) {
names(x)[-1] <- paste(names(x)[-1], names(lst1)[i], sep = "_")
names(x) <- str_trim(names(x))
x
})[[1]]
}), names(lst3))
df1 <- join_all(lst4, by = "Year")
dim(df1)
# [1] 9 27311
sapply(split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1])), function(x) {
df2 <- df1[, x]
df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"),
numcolwise(function(y) quantile(y,
seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE)
ncol(df3)
})
#
#G100 G101 G102 G103 G104 G105 G106 G107 G108 G109 G110 G111 G112 G113 G114
G115
# 157 258 258 98 157 258 256 258 250 258 258 147 157 250 250
256
#G116 G117 G118 G119 G120 GG10 GG11 GG12 GG13 GG14 GG15 GG16 GG17 GG18 GG19
GG20
# 249 240 181 188 256 146 117 258 153 256 255 246 255 256 258
257
#GG21 GG22 GG23 GG24 GG25 GG26 GG27 GG28 GG29 GG30 GG31 GG32 GG33 GG34 GG35
GG36
# 145 258 258 255 258 157 164 144 265 258 254 258 258 157 258
176
#GG37 GG38 GG39 GG40 GG41 GG42 GG43 GG44 GG45 GG46 GG47 GG48 GG49 GG50 GG51
GG52
# 258 256 257 258 258 258 248 258 156 258 157 157 258 258 258
258
#GG53 GG54 GG55 GG56 GG57 GG58 GG59 GG60 GG61 GG62 GG63 GG64 GG65 GG66 GG67
GG68
# 258 148 258 258 258 258 257 258 258 258 157 154 153 258 248
255
#GG69 GG70 GG71 GG72 GG73 GG74 GG75 GG76 GG77 GG78 GG79 GG80 GG81 GG82 GG83
GG84
# 257 256 258 258 157 256 256 257 257 250 257 139 155 256 256
257
#GG85 GG86 GG87 GG88 GG89 GG90 GG91 GG92 GG93 GG94 GG95 GG96 GG97 GG98 GG99
GGG1
# 257 256 258 258 257 258 258 258 258 157 157 157 258 258 258
258
#GGG2 GGG3 GGG4 GGG5 GGG6 GGG7 GGG8
# 256 258 157 258 258 256 258
lst5 <- split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1]))
lapply(seq_along(lst5), function(i) {
df2 <- df1[, lst5[[i]]]
df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"),
numcolwise(function(y) quantile(y,
seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE)
df3[1:3, 1:3]
write.csv(df3, paste0(paste(getwd(), "final", paste(names(lst4)[[i]],
"Quantile",
sep = "_"), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
sapply(ReadOut1, dim)[,1:3]
# [,1] [,2] [,3]
#[1,] 101 101 101
#[2,] 157 258 258
lapply(ReadOut1, function(x) x[1:2, 1:3])[1:3]
#[[1]]
# Percentiles pav.DJF_G100 pav.MAM_G100
#1 0% 0 0.640500
#2 1% 0 0.664604
#
#[[2]]
# Percentiles txav.DJF_G101 txav.MAM_G101
#1 0% -13.8756 4.742400
#2 1% -13.8140 4.817184
#
#[[3]]
# Percentiles txav.DJF_G102 txav.MAM_G102
#1 0% -15.05000 4.520700
#2 1% -14.96833 4.543828
### Q2: Observed data
dir.create("Indices")
names1 <- unlist(lapply(ReadOut1, function(x) names(x)[-1]))
names2 <- gsub("\\_.*", "", names1)
names3 <- unique(gsub("[.]", " ", names2))
res <- do.call(rbind, lapply(seq_along(lst5), function(i) {
df2 <- df1[, lst5[[i]]]
vec1 <- colMeans(df2, na.rm = TRUE)
vec2 <- rep(NA, length(names3))
names(vec2) <- paste(names3, names(lst5)[[i]], sep = "_")
vec2[names(vec2) %in% names(vec1)] <- vec1
names(vec2) <- gsub("\\_.*", "", names(vec2))
vec2
}))
dim(res)
#[1] 119 264
lapply(seq_len(ncol(res)), function(i) {
mat1 <- t(res[, i, drop = FALSE])
colnames(mat1) <- names(lst4)
write.csv(mat1, paste0(paste(getwd(), "Indices", gsub(" ", "_",
rownames(mat1)),
sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
## Output2:
ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
length(ReadOut2)
# [1] 264
list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1]
# [1] 'Indices/pav_ANN.csv'
res[, "pav ANN", drop = FALSE]
ReadOut2[[1]]
### Sample data Working directory changed to 'sample'
dir.create("Indices_colMeans")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "",
list.files(pattern = ".csv")))
lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) {
lines1 <- readLines(x2)
header1 <- lines1[1:2]
dat1 <- read.table(text = lines1, header = FALSE, sep = ",",
stringsAsFactors = FALSE,
skip = 2)
colnames(dat1) <- Reduce(paste, strsplit(header1, ","))
dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))
res1 <- do.call(rbind, lapply(seq_along(lst2), function(i) {
rowMeans(do.call(cbind, lapply(lst2[[i]], function(x) colMeans(x[, -1],
na.rm = TRUE))),
na.rm = TRUE)
}))
lapply(seq_len(ncol(res1)), function(i) {
mat1 <- t(res1[, i, drop = FALSE])
colnames(mat1) <- names(lst2)
write.csv(mat1, paste0(paste(getwd(), "Indices_colMeans", gsub(" ", "_",
rownames(mat1)),
sep = "/"), ".csv"), row.names = FALSE, quote = FALSE)
})
## Output2 Sample
ReadOut2S <- lapply(list.files(recursive = TRUE)[grep("Indices",
list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
length(ReadOut2S)
# [1] 257
list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1]
# [1] 'Indices_colMeans/pav_ANN.csv'
res1[, "pav ANN", drop = FALSE]
# pav ANN [1,] 1.545620 [2,] 1.518553
ReadOut2S[[1]]
# G100 G101 1 1.54562 1.518553
______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.