Hello,

I have no experience with packages foreach and doMC.
But I believe that paralel computing only pays if the datasets are really large, due to the setup time. Maybe "thousands of observations" is not that large.

Rui Barradas

Em 01-09-2013 22:21, Ignacio Martinez escreveu:
Thanks a lot Rui. Loops make sense to me. I made one modification to your
code. I have thousands of observation, so I would like to run it in
parallel. This is my reproducible example:

# Make Data Frame for video actions between given times for user X
DataVideoActionT <- function (userX, Time1, Time2, Time3){
   #Get data for user X
   videoActionsX<-subset(videoLectureActions, username==userX)
   #Time1 = before first attempt
   videoActionsX_T1<-subset(videoActionsX, eventTimestamp<Time1)
   #Time2 = before best attemp
   videoActionsX_T2<-subset(videoActionsX, eventTimestamp<Time2 &
eventTimestamp>Time1)
   #Time3= before last attemp
   videoActionsX_T3<-subset(videoActionsX, eventTimestamp<Time3 &
eventTimestamp>Time1)

   error1 = sum(videoActionsX_T1$type==" error ")
   pause1 = sum(videoActionsX_T1$type==" pause ")
   play1 = sum(videoActionsX_T1$type==" play ")
   ratechange1 = sum(videoActionsX_T1$type==" ratechange ")
   seeked1 = sum(videoActionsX_T1$type==" seeked ")
   stalled1 = sum(videoActionsX_T1$type==" stalled ")

   error2 = sum(videoActionsX_T2$type==" error ")
   pause2 = sum(videoActionsX_T2$type==" pause ")
   play2 = sum(videoActionsX_T2$type==" play ")
   ratechange2 = sum(videoActionsX_T2$type==" ratechange ")
   seeked2 = sum(videoActionsX_T2$type==" seeked ")
   stalled2 = sum(videoActionsX_T2$type==" stalled ")

   error3 = sum(videoActionsX_T3$type==" error ")
   pause3 = sum(videoActionsX_T3$type==" pause ")
   play3 = sum(videoActionsX_T3$type==" play ")
   ratechange3 = sum(videoActionsX_T3$type==" ratechange ")
   seeked3 = sum(videoActionsX_T3$type==" seeked ")
   stalled3 = sum(videoActionsX_T3$type==" stalled ")

   data<-data.frame(anon_ID=userX,
                    error1 = error1,
                    pause1 = pause1,
                    play1 = play1,
                    ratechange1 = ratechange1,
                    seeked1=seeked1,
                    stalled1=stalled1,
                    error2 = error2,
                    pause2 = pause2,
                    play2 = play2,
                    ratechange2 = ratechange2,
                    seeked2 =seeked2,
                    stalled2 = stalled2,
                    error3 = error3,
                    pause3 = pause3,
                    play3 = play3,
                    ratechange3 = ratechange3,
                    seeked3 = seeked3,
                    stalled3 = stalled3)
   return(data)
}

videoLectureActions<-structure(list(username = c("exampleID1",
"exampleID1", "exampleID1",
                                                  "exampleID2",
"exampleID2", "exampleID2", "exampleID3", "exampleID3",
                                                  "exampleID3",
"exampleID3"), currentTime = c("103.701247", "103.701247",

                   "107.543877", "107.543877", "116.456507", "116.456507",
"119.987188",

                   "177.816693", "183.417124", "183.417124"), playbackRate =
c("null",


   "null", "null", "null", "null", "null", "null", "null", "null",


   "null"), pause = c("true", "false", "true", "false", "true",


                      "false", "true", "false", "true", "false"), error =
c("null",



  "null", "null", "null", "null", "null", "null", "null", "null",



  "null"), networkState = c("1", "1", "1", "1", "1", "1", "1",



                          "1", "1", "1"), readyState = c("4", "4", "4", "4",
"4", "4",



                                                         "4", "4", "4",
"4"), lectureID = c("exampleLectureID1", "exampleLectureID1",




                "exampleLectureID1", "exampleLectureID1",
"exampleLectureID1",




                "exampleLectureID1", "exampleLectureID1",
"exampleLectureID1",




                "exampleLectureID1", "exampleLectureID1"), eventTimestamp =
c("2013-03-04 18:51:49",





  "2013-03-04 18:51:50", "2013-03-04 18:51:54", "2013-03-04 18:51:56",





  "2013-03-04 18:52:05", "2013-03-04 18:52:07", "2013-03-04 18:52:11",





  "2013-03-04 18:59:17", "2013-03-04 18:59:23", "2013-03-04 18:59:31"




                ), initTimestamp = c("2013-03-04 18:44:15", "2013-03-04
18:44:15",




                                     "2013-03-04 18:44:15", "2013-03-04
18:44:15", "2013-03-04 18:44:15",




                                     "2013-03-04 18:44:15", "2013-03-04
18:44:15", "2013-03-04 18:44:15",




                                     "2013-03-04 18:44:15", "2013-03-04
18:44:15"), type = c(" pause ",





                 " play ", " pause ", " play ", " pause ", " play ", " pause
",





                 " play ", " pause ", " play "), prevTime = c("103.701247 ",
"103.701247 ",





                                                              "107.543877 ",
"107.543877 ", "116.456507 ", "116.456507 ", "119.987188 ",





                                                              "177.816693 ",
"183.417124 ", "183.417124 ")), .Names = c("username",






                                            "currentTime", "playbackRate",
"pause", "error", "networkState",






                                            "readyState", "lectureID",
"eventTimestamp", "initTimestamp",






                                            "type", "prevTime"), row.names =
c(1L, 2L, 5L, 6L, 17L, 21L,







   28L, 936L, 957L, 988L), class = "data.frame")
data<-structure(list(anon_ID = c("exampleID1", "exampleID2", "exampleID3"
), maxGrade = c(10, 5, 10), firstGrade = c(10, 5, 8), lastGrade = c(10, 5,
10), total_submissions = c(1L, 1L, 3L), Time1 = structure(c(1361993741,
1362356090, 1362357401), class = c("POSIXct", "POSIXt"), tzone = ""), TimeM
= structure(c(1361993741, 1362356090, 1362492744), class = c("POSIXct",
"POSIXt"), tzone = ""), TimeL = structure(c(1361993741, 1362356090,
1362492744), class = c("POSIXct", "POSIXt"), tzone = "")), .Names =
c("anon_ID", "maxGrade", "firstGrade", "lastGrade", "total_submissions",
"Time1", "TimeM", "TimeL"), row.names = c(NA, 3L), class = "data.frame")

library(foreach)
library(doMC)
registerDoMC(2)  #change the 2 to your number of CPU cores

n <- nrow(data)
res <- list("vector", n)
foreach(i=1:n, .verbose=FALSE, .combine=rbind) %do% {
   res[[i]] <- with(data, DataVideoActionT(anon_ID[i], Time1[i], TimeM[i],
TimeL[i]))
}
test<-do.call(rbind, res)

I have 2 questions.

1. How can I make foreach not print to the console?

2. I want to run this in parallel, I i change the %do% for %dopar% the code
stop working. Instead of getting test with 3 observations and 19 variables
I get a 2x1 character matrix


Thanks!



On Sun, Sep 1, 2013 at 3:00 PM, Rui Barradas <ruipbarra...@sapo.pt> wrote:

Hello,

Your example doesn't really run, but for what I've seen, if your second
data frame is named dat2, something along the lines of

n <- nrow(dat2)
res <- list("vector", n)
for(i in 1:n){
         res[[i]] <- with(dat2, DataVideoActionT(anon_ID[i], Time1[i],
TimeM[i], TimeL[i]))
}

do.call(rbind, res)


Rui Barradas

Em 01-09-2013 17:40, Ignacio Martinez escreveu:

I hope this reproduceble example helps understand what I'm trying to do.

This is the function:

# Make Data Frame for video actions between given times for user X
DataVideoActionT <- function (userX, Time1, Time2, Time3){
    #Get data for user X
    videoActionsX<-subset(**videoLectureActions, username==userX)
    #Time1 = before first attempt
    videoActionsX_T1<-subset(**videoActionsX, eventTimestamp<Time1)
    #Time2 = before best attemp
    videoActionsX_T2<-subset(**videoActionsX, eventTimestamp<Time2 &
eventTimestamp>Time1)
    #Time3= before last attemp
    videoActionsX_T3<-subset(**videoActionsX, eventTimestamp<Time3 &
eventTimestamp>Time1)

    error1 = sum(videoActionsX_T1$type==" error ")
    pause1 = sum(videoActionsX_T1$type==" pause ")
    play1 = sum(videoActionsX_T1$type==" play ")
    ratechange1 = sum(videoActionsX_T1$type==" ratechange ")
    seeked1 = sum(videoActionsX_T1$type==" seeked ")
    stalled1 = sum(videoActionsX_T1$type==" stalled ")

    error2 = sum(videoActionsX_T2$type==" error ")
    pause2 = sum(videoActionsX_T2$type==" pause ")
    play2 = sum(videoActionsX_T2$type==" play ")
    ratechange2 = sum(videoActionsX_T2$type==" ratechange ")
    seeked2 = sum(videoActionsX_T2$type==" seeked ")
    stalled2 = sum(videoActionsX_T2$type==" stalled ")

    error3 = sum(videoActionsX_T3$type==" error ")
    pause3 = sum(videoActionsX_T3$type==" pause ")
    play3 = sum(videoActionsX_T3$type==" play ")
    ratechange3 = sum(videoActionsX_T3$type==" ratechange ")
    seeked3 = sum(videoActionsX_T3$type==" seeked ")
    stalled3 = sum(videoActionsX_T3$type==" stalled ")

    data<-data.frame(anon_ID=**userX,
                     error1 = error1,
                     pause1 = pause1,
                     play1 = play1,
                     ratechange1 = ratechange1,
                     seeked1=seeked1,
                     stalled1=stalled1,
                     error2 = error2,
                     pause2 = pause2,
                     play2 = play2,
                     ratechange2 = ratechange2,
                     seeked2 =seeked2,
                     stalled2 = stalled2,
                     error3 = error3,
                     pause3 = pause3,
                     play3 = play3,
                     ratechange3 = ratechange3,
                     seeked3 = seeked3,
                     stalled3 = stalled3)
    return(data)
}

This is the videoActionsX  dataframe:

structure(list(username = c("exampleID1", "exampleID1", "exampleID1",
                              "exampleID2", "exampleID2", "exampleID2",
"exampleID3", "exampleID3",
                              "exampleID3", "exampleID3"), currentTime =
c("103.701247", "103.701247",

   "107.543877", "107.543877", "116.456507", "116.456507", "119.987188",

   "177.816693", "183.417124", "183.417124"), playbackRate = c("null",

                                                           "null", "null",
"null", "null", "null", "null", "null", "null",

                                                           "null"), pause =
c("true", "false", "true", "false", "true",


"false", "true", "false", "true", "false"), error = c("null",


                                                        "null", "null",
"null", "null", "null", "null", "null", "null",


                                                        "null"),
networkState
= c("1", "1", "1", "1", "1", "1", "1",



      "1", "1", "1"), readyState = c("4", "4", "4", "4", "4", "4",



                                     "4", "4", "4", "4"), lectureID =
c("exampleLectureID1", "exampleLectureID1",




"exampleLectureID1", "exampleLectureID1", "exampleLectureID1",




"exampleLectureID1", "exampleLectureID1", "exampleLectureID1",




"exampleLectureID1", "exampleLectureID1"), eventTimestamp = c("2013-03-04
18:51:49",




                                                          "2013-03-04
18:51:50", "2013-03-04 18:51:54", "2013-03-04 18:51:56",




                                                          "2013-03-04
18:52:05", "2013-03-04 18:52:07", "2013-03-04 18:52:11",




                                                          "2013-03-04
18:59:17", "2013-03-04 18:59:23", "2013-03-04 18:59:31"



                                                                        ),
initTimestamp = c("2013-03-04 18:44:15", "2013-03-04 18:44:15",




                 "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04
18:44:15",




                 "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04
18:44:15",




                 "2013-03-04 18:44:15", "2013-03-04 18:44:15"), type = c("
pause ",




                                                                         "
play ", " pause ", " play ", " pause ", " play ", " pause ",




                                                                         "
play ", " pause ", " play "), prevTime = c("103.701247 ", "103.701247 ",





                                          "107.543877 ", "107.543877 ",
"116.456507 ", "116.456507 ", "119.987188 ",





                                          "177.816693 ", "183.417124 ",
"183.417124 ")), .Names = c("username",






                        "currentTime", "playbackRate", "pause", "error",
"networkState",






                        "readyState", "lectureID", "eventTimestamp",
"initTimestamp",






                        "type", "prevTime"), row.names = c(1L, 2L, 5L, 6L,
17L, 21L,






                                                           28L, 936L, 957L,
988L), class = "data.frame")



But with over 2000 observation.

And this is the other data frame

structure(list(anon_ID = c("exampleID1", "exampleID2", "exampleID3" ),
maxGrade = c(10, 5, 10), firstGrade = c(10, 5, 8), lastGrade = c(10,
5, 10), total_submissions = c(1L, 1L, 3L), Time1 =
structure(c(1361993741, 1362356090, 1362357401), class = c("POSIXct",
"POSIXt"), tzone = ""), TimeM = structure(c(1361993741, 1362356090,
1362492744), class = c("POSIXct", "POSIXt"), tzone = ""), TimeL =
structure(c(1361993741, 1362356090, 1362492744), class = c("POSIXct",
"POSIXt"), tzone = "")), .Names = c("anon_ID", "maxGrade",
"firstGrade", "lastGrade", "total_submissions", "Time1", "TimeM",
"TimeL"), row.names = c(NA, 3L), class = "data.frame")


But with a lot more observations.


What I want to do is to call  function (userX, Time1, Time2, Time3)
for all the user in the second data frame where Time1=Time1,
Time2=TimeM, Time3=TimeL


I hope that is more clear.


Thanks a lot for all the help!



On Sun, Sep 1, 2013 at 11:33 AM, Bert Gunter <gunter.ber...@gene.com>
wrote:

  Oh, another possibility is ?mapply, which I should have pointed out in my
previous reply. Sorry.

-- Bert


On Sun, Sep 1, 2013 at 8:30 AM, Bert Gunter <bgun...@gene.com> wrote:

  Rui et.al.:

But apply will not work if the data frame has columns of different
classes/types, as appears to be the case here. Viz, from ?apply:

"If X is not an array but an object of a class with a non-null dim<
http://127.0.0.1:12824/**help/library/base/help/dim<http://127.0.0.1:12824/help/library/base/help/dim>


   value (such as a data frame),apply attempts to coerce it to an array
via
   as.matrix if it is two-dimensional (e.g., a data frame) or via
as.array.
"

Simply looping by rows (via for() ) appears to be the simplest and
probably fastest solution. There are other ways via tapply() and
friends,
but these are also essentially loops and are likely to incur some
additional overhead.

All assuming I understand what the OP has requested, of course.

Cheers,

Bert


On Sun, Sep 1, 2013 at 7:31 AM, Rui Barradas <ruipbarra...@sapo.pt
wrote:

  Hello,

Maybe you need apply, not lapply. It seems you want to apply() a
function to the first dimension of your data.frame, something like

apply(dat, 1, fun)  #apply by rows


Hope this helps,

Rui Barradas

Em 01-09-2013 15:00, Ignacio Martinez escreveu:

  I have a Data Frame that contains, between other things, the following
fields: userX, Time1, Time2, Time3. The number of observations is
2000.

I have a function that has as inputs userX, Time1, Time2, Time3 and
return
a data frame with 1 observation and 19 variables.

I want to apply that function to all the observations of the first
data
frame to make a new data frame with 2000 observations and 19
variables.

I thought about using lapply, but if I understand correctly, it only
takes
one variable.

Can somebody point me in the right direction?

Thanks!

          [[alternative HTML version deleted]]

______________________________****________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/****listinfo/r-help<https://stat.ethz.ch/mailman/**listinfo/r-help>
<https://stat.**ethz.ch/mailman/listinfo/r-**help<https://stat.ethz.ch/mailman/listinfo/r-help>

PLEASE do read the posting guide http://www.R-project.org/**
posting-guide.html 
<http://www.R-project.org/**posting-guide.html<http://www.R-project.org/posting-guide.html>


and provide commented, minimal, self-contained, reproducible code.


  ______________________________****________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/****listinfo/r-help<https://stat.ethz.ch/mailman/**listinfo/r-help>
<https://stat.**ethz.ch/mailman/listinfo/r-**help<https://stat.ethz.ch/mailman/listinfo/r-help>

PLEASE do read the posting guide http://www.R-project.org/**
posting-guide.html 
<http://www.R-project.org/**posting-guide.html<http://www.R-project.org/posting-guide.html>


and provide commented, minimal, self-contained, reproducible code.




--

Bert Gunter
Genentech Nonclinical Biostatistics

Internal Contact Info:
Phone: 467-7374
Website:

http://pharmadevelopment.**roche.com/index/pdb/pdb-**
functional-groups/pdb-**biostatistics/pdb-ncb-home.htm<http://pharmadevelopment.roche.com/index/pdb/pdb-functional-groups/pdb-biostatistics/pdb-ncb-home.htm>





--

Bert Gunter
Genentech Nonclinical Biostatistics

Internal Contact Info:
Phone: 467-7374
Website:

http://pharmadevelopment.**roche.com/index/pdb/pdb-**
functional-groups/pdb-**biostatistics/pdb-ncb-home.htm<http://pharmadevelopment.roche.com/index/pdb/pdb-functional-groups/pdb-biostatistics/pdb-ncb-home.htm>






______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to