It seems we're sticking with the C memcpy for a while (which does the bounds
check and logging) but now we're missing out on the potential asm speedup.
Let's try the best of both worlds by having the C memcpy call into memmove.
Yes, it'll do another direction test, but then it will go zip zoom fast.
On certain somewhat popular architectures it can skip the alignment checks,
for instance.


Index: string/memcpy.c
===================================================================
RCS file: /cvs/src/lib/libc/string/memcpy.c,v
retrieving revision 1.2
diff -u -p -r1.2 memcpy.c
--- string/memcpy.c     31 Aug 2015 02:53:57 -0000      1.2
+++ string/memcpy.c     5 Sep 2016 10:05:50 -0000
@@ -36,26 +36,14 @@
 #include <syslog.h>
 
 /*
- * sizeof(word) MUST BE A POWER OF TWO
- * SO THAT wmask BELOW IS ALL ONES
- */
-typedef        long word;              /* "word" used for optimal copy speed */
-
-#define        wsize   sizeof(word)
-#define        wmask   (wsize - 1)
-
-/*
  * Copy a block of memory, not handling overlap.
  */
 void *
-memcpy(void *dst0, const void *src0, size_t length)
+memcpy(void *dst, const void *src, size_t length)
 {
-       char *dst = dst0;
-       const char *src = src0;
-       size_t t;
 
        if (length == 0 || dst == src)          /* nothing to do */
-               goto done;
+               return dst;
 
        if ((dst < src && dst + length > src) ||
            (src < dst && src + length > dst)) {
@@ -65,36 +53,7 @@ memcpy(void *dst0, const void *src0, siz
                abort();
        }
 
-       /*
-        * Macros: loop-t-times; and loop-t-times, t>0
-        */
-#define        TLOOP(s) if (t) TLOOP1(s)
-#define        TLOOP1(s) do { s; } while (--t)
+       return memmove(dst, src, length);
 
-       /*
-        * Copy forward.
-        */
-       t = (long)src;  /* only need low bits */
-       if ((t | (long)dst) & wmask) {
-               /*
-                * Try to align operands.  This cannot be done
-                * unless the low bits match.
-                */
-               if ((t ^ (long)dst) & wmask || length < wsize)
-                       t = length;
-               else
-                       t = wsize - (t & wmask);
-               length -= t;
-               TLOOP1(*dst++ = *src++);
-       }
-       /*
-        * Copy whole words, then mop up any trailing bytes.
-        */
-       t = length / wsize;
-       TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize);
-       t = length & wmask;
-       TLOOP(*dst++ = *src++);
-done:
-       return (dst0);
 }
 DEF_STRONG(memcpy);

Reply via email to