On 12/29/2011 3:19 AM, Vladimir Panteleev wrote:
I'd like to invite you to translate Daniel Vik's C memcpy implementation to D:
http://www.danielvik.com/2010/02/fast-memcpy-in-c.html

Challenge accepted.
------------------------
/********************************************************************
 ** File:     memcpy.c
 **
 ** Copyright (C) 1999-2010 Daniel Vik
 **
 ** This software is provided 'as-is', without any express or implied
 ** warranty. In no event will the authors be held liable for any
 ** damages arising from the use of this software.
 ** Permission is granted to anyone to use this software for any
 ** purpose, including commercial applications, and to alter it and
 ** redistribute it freely, subject to the following restrictions:
 **
 ** 1. The origin of this software must not be misrepresented; you
 **    must not claim that you wrote the original software. If you
 **    use this software in a product, an acknowledgment in the
 **    use this software in a product, an acknowledgment in the
 **    product documentation would be appreciated but is not
 **    required.
 **
 ** 2. Altered source versions must be plainly marked as such, and
 **    must not be misrepresented as being the original software.
 **
 ** 3. This notice may not be removed or altered from any source
 **    distribution.
 **
 **
 ** Description: Implementation of the standard library function memcpy.
 **             This implementation of memcpy() is ANSI-C89 compatible.
 **
 **             The following configuration options can be set:
 **
 **           LITTLE_ENDIAN   - Uses processor with little endian
 **                             addressing. Default is big endian.
 **
 **           PRE_INC_PTRS    - Use pre increment of pointers.
 **                             Default is post increment of
 **                             pointers.
 **
 **           INDEXED_COPY    - Copying data using array indexing.
 **                             Using this option, disables the
 **                             PRE_INC_PTRS option.
 **
 **           MEMCPY_64BIT    - Compiles memcpy for 64 bit
 **                             architectures
 **
 **
 ** Best Settings:
 **
 ** Intel x86:  LITTLE_ENDIAN and INDEXED_COPY
 **
 *******************************************************************/

module memcpy;


/********************************************************************
 ** Configuration definitions.
 *******************************************************************/

version = LITTLE_ENDIAN;
version = INDEXED_COPY;


/********************************************************************
 ** Includes for size_t definition
 *******************************************************************/



/********************************************************************
 ** Typedefs
 *******************************************************************/

alias ubyte       UInt8;
alias ushort      UInt16;
alias uint        UInt32;
alias ulong       UInt64;

version (D_LP64)
{
    alias UInt64   UIntN;
    enum TYPE_WIDTH = 8;
}
else
{
    alias UInt32 UIntN;
    enum TYPE_WIDTH = 4;
}


/********************************************************************
 ** Remove definitions when INDEXED_COPY is defined.
 *******************************************************************/

//#if defined (INDEXED_COPY)
//#if defined (PRE_INC_PTRS)
//#undef PRE_INC_PTRS
//#endif /*PRE_INC_PTRS*/
//#endif /*INDEXED_COPY*/



/********************************************************************
 ** Definitions for pre and post increment of pointers.
 *******************************************************************/

version (PRE_INC_PTRS)
{
    void START_VAL(ref UInt8* x)      { x--; }
    ref T INC_VAL(T)(ref T* x)        { return *++x; }
    UInt8* CAST_TO_U8(void* p, int o) { return cast(UInt8*)p + o + TYPE_WIDTH; }
    enum WHILE_DEST_BREAK  = (TYPE_WIDTH - 1);
    enum PRE_LOOP_ADJUST   = -(TYPE_WIDTH - 1);
    enum PRE_SWITCH_ADJUST = 1;
}
else
{
    void START_VAL(UInt8* x)          { }
    ref T INC_VAL(T)(ref T* x)        { return *x++; }
    UInt8* CAST_TO_U8(void* p, int o) { return cast(UInt8*)p + o; }
    enum WHILE_DEST_BREAK  = 0;
    enum PRE_LOOP_ADJUST   = 0;
    enum PRE_SWITCH_ADJUST = 0;
}







/********************************************************************
 **
 ** void *memcpy(void *dest, const void *src, size_t count)
 **
 ** Args:     dest        - pointer to destination buffer
 **           src         - pointer to source buffer
 **           count       - number of bytes to copy
 **
 ** Return:   A pointer to destination buffer
 **
 ** Purpose:  Copies count bytes from src to dest.
 **           No overlap check is performed.
 **
 *******************************************************************/

void *memcpy(void *dest, const void *src, size_t count)
{
    auto dst8 = cast(UInt8*)dest;
    auto src8 = cast(UInt8*)src;

    UIntN* dstN;
    UIntN* srcN;
    UIntN dstWord;
    UIntN srcWord;

    /********************************************************************
     ** Macros for copying words of  different alignment.
     ** Uses incremening pointers.
     *******************************************************************/

    void CP_INCR() {
        INC_VAL(dstN) = INC_VAL(srcN);
    }

    void CP_INCR_SH(int shl, int shr) {
        version (LITTLE_ENDIAN)
        {
            dstWord   = srcWord >> shl;
            srcWord   = INC_VAL(srcN);
            dstWord  |= srcWord << shr;
            INC_VAL(dstN) = dstWord;
        }
        else
        {
            dstWord   = srcWord << shl;
            srcWord   = INC_VAL(srcN);
            dstWord  |= srcWord >> shr;
            INC_VAL(dstN) = dstWord;
        }
    }



    /********************************************************************
     ** Macros for copying words of  different alignment.
     ** Uses array indexes.
     *******************************************************************/

    void CP_INDEX(size_t idx) {
        dstN[idx] = srcN[idx];
    }

    void CP_INDEX_SH(size_t x, int shl, int shr) {
        version (LITTLE_ENDIAN)
        {
            dstWord   = srcWord >> shl;
            srcWord   = srcN[x];
            dstWord  |= srcWord << shr;
            dstN[x]  = dstWord;
        }
        else
        {
            dstWord   = srcWord << shl;
            srcWord   = srcN[x];
            dstWord  |= srcWord >> shr;
            dstN[x]  = dstWord;
        }
    }


    /********************************************************************
     ** Macros for copying words of different alignment.
     ** Uses incremening pointers or array indexes depending on
     ** configuration.
     *******************************************************************/

    version (INDEXED_COPY)
    {
        void CP(size_t idx) { CP_INDEX(idx); }
        void CP_SH(size_t idx, int shl, int shr) { CP_INDEX_SH(idx, shl, shr); }

        void INC_INDEX(T)(ref T* p, size_t o) { p += o; }
    }
    else
    {
        void CP(size_t idx) { CP_INCR(); }
        void CP_SH(size_t idx, int shl, int shr) { CP_INCR_SH(shl, shr); }

        void INC_INDEX(T)(T* p, size_t o) { }
    }


    void COPY_REMAINING(size_t count) {
        START_VAL(dst8);
        START_VAL(src8);

        switch (count) {
        case 7: INC_VAL(dst8) = INC_VAL(src8);
        case 6: INC_VAL(dst8) = INC_VAL(src8);
        case 5: INC_VAL(dst8) = INC_VAL(src8);
        case 4: INC_VAL(dst8) = INC_VAL(src8);
        case 3: INC_VAL(dst8) = INC_VAL(src8);
        case 2: INC_VAL(dst8) = INC_VAL(src8);
        case 1: INC_VAL(dst8) = INC_VAL(src8);
        case 0:
        default: break;
        }
    }

    void COPY_NO_SHIFT() {
        dstN = cast(UIntN*)(dst8 + PRE_LOOP_ADJUST);
        srcN = cast(UIntN*)(src8 + PRE_LOOP_ADJUST);
        size_t length = count / TYPE_WIDTH;

        while (length & 7) {
            CP_INCR();
            length--;
        }

        length /= 8;

        while (length--) {
            CP(0);
            CP(1);
            CP(2);
            CP(3);
            CP(4);
            CP(5);
            CP(6);
            CP(7);

            INC_INDEX(dstN, 8);
            INC_INDEX(srcN, 8);
        }

        src8 = CAST_TO_U8(srcN, 0);
        dst8 = CAST_TO_U8(dstN, 0);

        COPY_REMAINING(count & (TYPE_WIDTH - 1));
    }


    void COPY_SHIFT(int shift) {
        dstN  = cast(UIntN*)(((cast(UIntN)dst8) + PRE_LOOP_ADJUST) &
                                 ~(TYPE_WIDTH - 1));
        srcN  = cast(UIntN*)(((cast(UIntN)src8) + PRE_LOOP_ADJUST) &
                                 ~(TYPE_WIDTH - 1));
        size_t length  = count / TYPE_WIDTH;
        srcWord = INC_VAL(srcN);

        while (length & 7) {
            CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift));
            length--;
        }

        length /= 8;

        while (length--) {
            CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift));

            INC_INDEX(dstN, 8);
            INC_INDEX(srcN, 8);
        }

        src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH));
        dst8 = CAST_TO_U8(dstN, 0);

        COPY_REMAINING(count & (TYPE_WIDTH - 1));
    }


    if (count < 8) {
        COPY_REMAINING(count);
        return dest;
    }

    START_VAL(dst8);
    START_VAL(src8);

    while ((cast(UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) {
        INC_VAL(dst8) = INC_VAL(src8);
        count--;
    }

    final switch (((cast(UIntN)src8) + PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) {
    case 0: COPY_NO_SHIFT(); break;
    case 1: COPY_SHIFT(1);   break;
    case 2: COPY_SHIFT(2);   break;
    case 3: COPY_SHIFT(3);   break;
    static if (TYPE_WIDTH >= 4)
    {
        case 4: COPY_SHIFT(4);   break;
        case 5: COPY_SHIFT(5);   break;
        case 6: COPY_SHIFT(6);   break;
        case 7: COPY_SHIFT(7);   break;
    }
    }

    return dest;
}

Reply via email to