Hervé Pagès wrote:
[...]
Code:
=======================================================================
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void memcpy_with_recycling_of_src(char *dest, size_t dest_nblocks,
                  const char *src, size_t src_nblocks,
                  size_t blocksize)
{
    int i, imax, q;
    size_t src_size;

    imax = dest_nblocks - src_nblocks;
    src_size = src_nblocks * blocksize;
    for (i = 0; i <= imax; i += src_nblocks) {
        memcpy(dest, src, src_size);
        dest += src_size;
        i += src_nblocks;
          ^^^^^^^^^^^^^^^^^
          //i += src_nblocks;

oops, take this out!

Of course copy_ints is twice slower now but is still
about 2.5x faster than copy_ints2 (the copyVector
way) for copying a length 3 vector recycled 3.3 million
times. For a length 1000 vector being recycled 25 times,
it's about 17x faster.

Cheers,
H.

    }
    q = dest_nblocks - i;
    if (q > 0)
        memcpy(dest, src, q * blocksize);
    return;
}

void copy_ints(int *dest, int dest_length,
               const int *src, int src_length)
{
    memcpy_with_recycling_of_src((char *) dest, dest_length,
                     (char *) src, src_length,
                     sizeof(int));
}

/* the copyVector() way */
void copy_ints2(int *dest, int dest_length,
                const int *src, int src_length)
{
    int i;

    for (i = 0; i < dest_length; i++)
        dest[i] = src[i % src_length];
}

int main()
{
    int nt, ns, kmax, k;
    int *t, *s;

    nt = 3;
    ns = 10000000;
    kmax = 100;
    t = (int *) malloc(nt * sizeof(int));
    s = (int *) malloc(ns * sizeof(int));
    for (k = 0; k < kmax; k++)
        //copy_ints(s, ns, t, nt);
        copy_ints2(s, ns, t, nt);
    return 0;
}

Note that the function that actually does the job is
memcpy_with_recycling_of_src(). It can be reused for copying
vectors with elements of an arbitrary size.

Cheers,
H.


Cheers,
H.


Matthew

"William Dunlap" <wdun...@tibco.com> wrote in message news:77eb52c6dd32ba4d87471dcd70c8d70002ce6...@na-pa-vbe03.na.tibco.com...
If I were worried about the time this loop takes,
I would avoid using i%nt.  For the attached C code
compile with gcc 4.3.3 with -O2 I get
  > # INTEGER() in loop
  > system.time( r1 <- .Call("my_rep1", 1:3, 1e7) )
     user  system elapsed
    0.060   0.012   0.071

  > # INTEGER() before loop
  > system.time( r2 <- .Call("my_rep2", 1:3, 1e7) )
     user  system elapsed
    0.076   0.008   0.086

  > # replace i%src_length in loop with j=0 before loop and
  > #    if(++j==src_length) j=0 ;
  > # in the loop.
  > system.time( r3 <- .Call("my_rep3", 1:3, 1e7) )
     user  system elapsed
    0.024   0.028   0.050
  > identical(r1,r2) && identical(r2,r3)
  [1] TRUE

The C code is:
#define USE_RINTERNALS /* pretend we are in the R kernel */
#include <R.h>
#include <Rinternals.h>


SEXP my_rep1(SEXP s_src, SEXP s_dest_length)
{
    int src_length = length(s_src) ;
    int dest_length = asInteger(s_dest_length) ;
    int i,j ;
    SEXP s_dest ;
    PROTECT(s_dest = allocVector(INTSXP, dest_length)) ;
    if(TYPEOF(s_src) != INTSXP) error("src must be integer data") ;
    for(i=0;i<dest_length;i++) {
        INTEGER(s_dest)[i] = INTEGER(s_src)[i % src_length] ;
    }
    UNPROTECT(1) ;
    return s_dest ;
}
SEXP my_rep2(SEXP s_src, SEXP s_dest_length)
{
    int src_length = length(s_src) ;
    int dest_length = asInteger(s_dest_length) ;
    int *psrc = INTEGER(s_src) ;
    int *pdest ;
    int i ;
    SEXP s_dest ;
    PROTECT(s_dest = allocVector(INTSXP, dest_length)) ;
    pdest = INTEGER(s_dest) ;
    if(TYPEOF(s_src) != INTSXP) error("src must be integer data") ;
    /* end of boilerplate */
    for(i=0;i<dest_length;i++) {
        pdest[i] = psrc[i % src_length] ;
    }
    UNPROTECT(1) ;
    return s_dest ;
}
SEXP my_rep3(SEXP s_src, SEXP s_dest_length)
{
    int src_length = length(s_src) ;
    int dest_length = asInteger(s_dest_length) ;
    int *psrc = INTEGER(s_src) ;
    int *pdest ;
    int i,j ;
    SEXP s_dest ;
    PROTECT(s_dest = allocVector(INTSXP, dest_length)) ;
    pdest = INTEGER(s_dest) ;
    if(TYPEOF(s_src) != INTSXP) error("src must be integer data") ;
    /* end of boilerplate */
    for(j=0,i=0;i<dest_length;i++) {
        *pdest++ = psrc[j++] ;
        if (j==src_length) {
            j = 0 ;
        }
    }
    UNPROTECT(1) ;
    return s_dest ;
}

Bill Dunlap
Spotfire, TIBCO Software
wdunlap tibco.com

-----Original Message-----
From: r-devel-boun...@r-project.org
[mailto:r-devel-boun...@r-project.org] On Behalf Of Romain Francois
Sent: Wednesday, April 21, 2010 12:32 PM
To: Matthew Dowle
Cc: r-de...@stat.math.ethz.ch
Subject: Re: [Rd] suggestion how to use memcpy in duplicate.c

Le 21/04/10 17:54, Matthew Dowle a écrit :
From copyVector in duplicate.c :
void copyVector(SEXP s, SEXP t)
{
     int i, ns, nt;
     nt = LENGTH(t);
     ns = LENGTH(s);
     switch (TYPEOF(s)) {
...
     case INTSXP:
     for (i = 0; i<  ns; i++)
         INTEGER(s)[i] = INTEGER(t)[i % nt];
     break;
...

could that be replaced with :

     case INTSXP:
     for (i=0; i<ns/nt; i++)
         memcpy((char *)DATAPTR(s)+i*nt*sizeof(int), (char
*)DATAPTR(t),
nt*sizeof(int));
     break;
or at least with something like this:

int* p_s = INTEGER(s) ;
int* p_t = INTEGER(t) ;
for( i=0 ; i < ns ; i++){
p_s[i] = p_t[i % nt];
}

since expanding the INTEGER macro over and over has a price.

and similar for the other types in copyVector.  This won't
help regular
vector copies, since those seem to be done by the
DUPLICATE_ATOMIC_VECTOR
macro, see next suggestion below, but it should help
copyMatrix which calls
copyVector, scan.c which calls copyVector on three lines,
dcf.c (once) and
dounzip.c (once).

For the DUPLICATE_ATOMIC_VECTOR macro there is already a
comment next to it
:

     <FIXME>: surely memcpy would be faster here?

which seems to refer to the for loop  :

     else { \
     int __i__; \
     type *__fp__ = fun(from), *__tp__ = fun(to); \
     for (__i__ = 0; __i__<  __n__; __i__++) \
       __tp__[__i__] = __fp__[__i__]; \
   } \

Could that loop be replaced by the following ?

    else { \
    memcpy((char *)DATAPTR(to), (char *)DATAPTR(from),
__n__*sizeof(type)); \
    }\

In the data.table package, dogroups.c uses this technique,
so the principle
is tested and works well so far.

Are there any road blocks preventing this change, or is
anyone already
working on it ?  If not then I'll try and test it (on
Ubuntu 32bit) and
submit patch with timings, as before.  Comments/pointers
much appreciated.
Matthew

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel



--
Romain Francois
Professional R Enthusiast
+33(0) 6 28 91 30 30
http://romainfrancois.blog.free.fr
|- http://bit.ly/9aKDM9 : embed images in Rd documents
|- http://tr.im/OIXN : raster images and RImageJ
|- http://tr.im/OcQe : Rcpp 0.7.7

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel


______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel



--
Hervé Pagès

Program in Computational Biology
Division of Public Health Sciences
Fred Hutchinson Cancer Research Center
1100 Fairview Ave. N, M2-B876
P.O. Box 19024
Seattle, WA 98109-1024

E-mail: hpa...@fhcrc.org
Phone:  (206) 667-5791
Fax:    (206) 667-1319

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to