Este codigo resuelve mi problema. Estoy usando `str_split` y como separador
'- '. Tambien tengo que usar 'trimws'. Supongo que se podria limpiar el
codigo para hacerlo mas eficiente, pero todavia no se me ocurrio como. *Muchas
gracias Carlos!*
library(dplyr)
library(randomNames)
library(geosphere)
set.seed(7142015)
# Define Parameters
n.Schools - 20
first.grade-3
last.grade-5
n.Grades -last.grade-first.grade+1
n.Classrooms - 4
n.Teachers - (n.Schools*n.Grades*n.Classrooms)/2 #Two classrooms per
teacher
# Define Random names function:
gen.names - function(n, which.names = both, name.order = last.first){
names - unique(randomNames(n=n, which.names = which.names, name.order =
name.order))
need - n - length(names)
while(need0){
names - unique(c(randomNames(n=need, which.names = which.names,
name.order = name.order), names))
need - n - length(names)
}
return(names)
}
# Generate n.Schools names
gen.schools - function(n.schools) {
School.ID -
paste0(gen.names(n = n.schools, which.names = last), ' School')
School.long - rnorm(n = n.schools, mean = 21.7672, sd = 0.025)
School.lat - rnorm(n = n.schools, mean = 58.8471, sd = 0.025)
School.RE - rnorm(n = n.schools, mean = 0, sd = 1)
Schools -
data.frame(School.ID, School.lat, School.long, School.RE) %%
mutate(School.ID = as.character(School.ID)) %%
rowwise() %% mutate (School.distance = distHaversine(
p1 = c(School.long, School.lat),
p2 = c(21.7672, 58.8471), r = 3961
))
return(Schools)
}
Schools - gen.schools(n.schools = n.Schools)
# Generate Grades
Grades - c(first.grade:last.grade)
# Generate n.Classrooms
Classrooms - LETTERS[1:n.Classrooms]
# Group schools and grades
SchGr - outer(paste0(Schools$School.ID, '-'), paste0(Grades, '-'),
FUN=paste)
#head(SchGr)
# Group SchGr and Classrooms
SchGrClss - outer(SchGr, paste0(Classrooms, '-'), FUN=paste)
#head(SchGrClss)
# These are the combination of School-Grades-Classroom
SchGrClssTmp - as.matrix(SchGrClss, ncol=1, nrow=length(SchGrClss) )
SchGrClssEnd - as.data.frame(SchGrClssTmp)
# Assign n.Teachers (2 classroom in a given school-grade)
Allpairs - as.data.frame(t(combn(SchGrClssTmp, 2)))
AllpairsTmp - paste(Allpairs$V1, Allpairs$V2, sep= )
library(stringr)
separoPairs - as.data.frame(str_split(string = AllpairsTmp, pattern = -))
separoPairs - as.data.frame(t(separoPairs))
row.names(separoPairs) - NULL
separoPairs - separoPairs %% select(-V7) %% #Drops empty column
mutate(V1=as.character(V1), V4=as.character(V4), V2=as.numeric(V2),
V5=as.numeric(V5)) %% mutate(V4 = trimws(V4, which = both))
separoPairs[120,]$V4
#Only the rows with V1=V4 and V2=V5 are valid
validPairs - separoPairs %% filter(V1==V4 V2==V5) %% select(V1, V2,
V3, V6)
# Generate n.Teachers
gen.teachers - function(n.teachers){
Teacher.ID - gen.names(n = n.teachers, name.order = last.first)
Teacher.exp - runif(n = n.teachers, min = 1, max = 30)
Teacher.Other - sample(c(0,1), replace = T, prob = c(0.5, 0.5), size =
n.teachers)
Teacher.RE - rnorm(n = n.teachers, mean = 0, sd = 1)
Teachers - data.frame(Teacher.ID, Teacher.exp, Teacher.Other, Teacher.RE)
return(Teachers)
}
Teachers - gen.teachers(n.teachers = n.Teachers) %%
mutate(Teacher.ID = as.character(Teacher.ID))
# Randomly assign n.Teachers teachers to the ValidPairs
TmpAssignments - validPairs[sample(1:nrow(validPairs), n.Teachers), ]
Assignments - cbind.data.frame(Teachers$Teacher.ID, TmpAssignments)
names(Assignments) - c(Teacher.ID, School.ID, Grade, Class_1,
Class_2)
# Tidy Data
library(tidyr)
TeacherClassroom - Assignments %%
gather(x, Classroom, Class_1,Class_2) %%
select(-x) %%
mutate(Teacher.ID = as.character(Teacher.ID))
# Merge
DF_Classrooms - TeacherClassroom %% full_join(Teachers, by=Teacher.ID)
%% full_join(Schools, by=School.ID)
On Tue, Jul 14, 2015 at 10:35 AM Ignacio Martinez ignaci...@gmail.com
wrote:
Genial Carlos! Tu codigo produce lo que quiero!
Estoy tratando de entender cada paso y hacer algunos cambios. Mi problema
es con como usar `str_plit_fixed`. Con tu codigo tengo eso:
separoPairs - as.data.frame(str_split_fixed(AllpairsTmp, , 6))
head(separoPairs)
V1 V2 V3 V4 V5 V6
1 e1 g1 c1 e2 g1 c1
2 e1 g1 c1 e3 g1 c1
3 e1 g1 c1 e4 g1 c1
4 e1 g1 c1 e5 g1 c1
5 e1 g1 c1 e6 g1 c1
6 e1 g1 c1 e7 g1 c1
V1 y V4 son el nombre de las escuelas, V2 y V5 del grado y V3 y V6 de la
division. Yo hice unos cambios para tener datos un poco mas complejos, pero
como resultado inintencional no puedo producir `separoPairs` Esto es lo que
mi codigo produce:
head(separoPairs) V1 V2 V3V4 V5 V6
1 Aslamy School 3 grade A Maruyama School 3 grade A
2 Aslamy School 3 grade A Smith School 3 grade A
3 Aslamy School 3 grade A Linares School 3 grade A
4 Aslamy School 3 grade A Dieyleh School 3 grade A
5 Aslamy School 3 grade A Hernandez School 3 grade A
6 Aslamy School 3 grade A Padgett School 3 grade A
Se puede