On 12/29/2011 11:47 AM, Walter Bright wrote:
On 12/29/2011 3:19 AM, Vladimir Panteleev wrote:
I'd like to invite you to translate Daniel Vik's C memcpy implementation to D:
http://www.danielvik.com/2010/02/fast-memcpy-in-c.html

Challenge accepted.

Here's another version that uses string mixins to ensure inlining of the COPY functions. There are no call instructions in the generated code. This should be as good as the C version using the same code generator.
----------------
/********************************************************************
 ** File:     memcpy.c
 **
 ** Copyright (C) 1999-2010 Daniel Vik
 **
 ** This software is provided 'as-is', without any express or implied
 ** warranty. In no event will the authors be held liable for any
 ** damages arising from the use of this software.
 ** Permission is granted to anyone to use this software for any
 ** purpose, including commercial applications, and to alter it and
 ** redistribute it freely, subject to the following restrictions:
 **
 ** 1. The origin of this software must not be misrepresented; you
 **    must not claim that you wrote the original software. If you
 **    use this software in a product, an acknowledgment in the
 **    use this software in a product, an acknowledgment in the
 **    product documentation would be appreciated but is not
 **    required.
 **
 ** 2. Altered source versions must be plainly marked as such, and
 **    must not be misrepresented as being the original software.
 **
 ** 3. This notice may not be removed or altered from any source
 **    distribution.
 **
 **
 ** Description: Implementation of the standard library function memcpy.
 **             This implementation of memcpy() is ANSI-C89 compatible.
 **
 **             The following configuration options can be set:
 **
 **           LITTLE_ENDIAN   - Uses processor with little endian
 **                             addressing. Default is big endian.
 **
 **           PRE_INC_PTRS    - Use pre increment of pointers.
 **                             Default is post increment of
 **                             pointers.
 **
 **           INDEXED_COPY    - Copying data using array indexing.
 **                             Using this option, disables the
 **                             PRE_INC_PTRS option.
 **
 **           MEMCPY_64BIT    - Compiles memcpy for 64 bit
 **                             architectures
 **
 **
 ** Best Settings:
 **
 ** Intel x86:  LITTLE_ENDIAN and INDEXED_COPY
 **
 *******************************************************************/

module memcpy;


/********************************************************************
 ** Configuration definitions.
 *******************************************************************/

version = LITTLE_ENDIAN;
version = INDEXED_COPY;


/********************************************************************
 ** Includes for size_t definition
 *******************************************************************/



/********************************************************************
 ** Typedefs
 *******************************************************************/

alias ubyte       UInt8;
alias ushort      UInt16;
alias uint        UInt32;
alias ulong       UInt64;

version (D_LP64)
{
    alias UInt64   UIntN;
    enum TYPE_WIDTH = 8;
}
else
{
    alias UInt32 UIntN;
    enum TYPE_WIDTH = 4;
}


/********************************************************************
 ** Remove definitions when INDEXED_COPY is defined.
 *******************************************************************/

//#if defined (INDEXED_COPY)
//#if defined (PRE_INC_PTRS)
//#undef PRE_INC_PTRS
//#endif /*PRE_INC_PTRS*/
//#endif /*INDEXED_COPY*/



/********************************************************************
 ** Definitions for pre and post increment of pointers.
 *******************************************************************/

version (PRE_INC_PTRS)
{
    void START_VAL(ref UInt8* x)      { x--; }
    ref T INC_VAL(T)(ref T* x)        { return *++x; }
    UInt8* CAST_TO_U8(void* p, int o) { return cast(UInt8*)p + o + TYPE_WIDTH; }
    enum WHILE_DEST_BREAK  = (TYPE_WIDTH - 1);
    enum PRE_LOOP_ADJUST   = -(TYPE_WIDTH - 1);
    enum PRE_SWITCH_ADJUST = 1;
}
else
{
    void START_VAL(UInt8* x)          { }
    ref T INC_VAL(T)(ref T* x)        { return *x++; }
    UInt8* CAST_TO_U8(void* p, int o) { return cast(UInt8*)p + o; }
    enum WHILE_DEST_BREAK  = 0;
    enum PRE_LOOP_ADJUST   = 0;
    enum PRE_SWITCH_ADJUST = 0;
}







/********************************************************************
 **
 ** void *memcpy(void *dest, const void *src, size_t count)
 **
 ** Args:     dest        - pointer to destination buffer
 **           src         - pointer to source buffer
 **           count       - number of bytes to copy
 **
 ** Return:   A pointer to destination buffer
 **
 ** Purpose:  Copies count bytes from src to dest.
 **           No overlap check is performed.
 **
 *******************************************************************/

void *memcpy(void *dest, const void *src, size_t count)
{
    auto dst8 = cast(UInt8*)dest;
    auto src8 = cast(UInt8*)src;

    UIntN* dstN;
    UIntN* srcN;
    UIntN dstWord;
    UIntN srcWord;

    /********************************************************************
     ** Macros for copying words of  different alignment.
     ** Uses incremening pointers.
     *******************************************************************/

    void CP_INCR() {
        INC_VAL(dstN) = INC_VAL(srcN);
    }

    void CP_INCR_SH(int shl, int shr) {
        version (LITTLE_ENDIAN)
        {
            dstWord   = srcWord >> shl;
            srcWord   = INC_VAL(srcN);
            dstWord  |= srcWord << shr;
            INC_VAL(dstN) = dstWord;
        }
        else
        {
            dstWord   = srcWord << shl;
            srcWord   = INC_VAL(srcN);
            dstWord  |= srcWord >> shr;
            INC_VAL(dstN) = dstWord;
        }
    }



    /********************************************************************
     ** Macros for copying words of  different alignment.
     ** Uses array indexes.
     *******************************************************************/

    void CP_INDEX(size_t idx) {
        dstN[idx] = srcN[idx];
    }

    void CP_INDEX_SH(size_t x, int shl, int shr) {
        version (LITTLE_ENDIAN)
        {
            dstWord   = srcWord >> shl;
            srcWord   = srcN[x];
            dstWord  |= srcWord << shr;
            dstN[x]  = dstWord;
        }
        else
        {
            dstWord   = srcWord << shl;
            srcWord   = srcN[x];
            dstWord  |= srcWord >> shr;
            dstN[x]  = dstWord;
        }
    }


    /********************************************************************
     ** Macros for copying words of different alignment.
     ** Uses incremening pointers or array indexes depending on
     ** configuration.
     *******************************************************************/

    version (INDEXED_COPY)
    {
        void CP(size_t idx) { CP_INDEX(idx); }
        void CP_SH(size_t idx, int shl, int shr) { CP_INDEX_SH(idx, shl, shr); }

        void INC_INDEX(T)(ref T* p, size_t o) { p += o; }
    }
    else
    {
        void CP(size_t idx) { CP_INCR(); }
        void CP_SH(size_t idx, int shl, int shr) { CP_INCR_SH(shl, shr); }

        void INC_INDEX(T)(T* p, size_t o) { }
    }

    static immutable string COPY_REMAINING = q{
        START_VAL(dst8);
        START_VAL(src8);

        switch (cnt) {
        case 7: INC_VAL(dst8) = INC_VAL(src8);
        case 6: INC_VAL(dst8) = INC_VAL(src8);
        case 5: INC_VAL(dst8) = INC_VAL(src8);
        case 4: INC_VAL(dst8) = INC_VAL(src8);
        case 3: INC_VAL(dst8) = INC_VAL(src8);
        case 2: INC_VAL(dst8) = INC_VAL(src8);
        case 1: INC_VAL(dst8) = INC_VAL(src8);
        case 0:
        default: break;
        }
    };

    static immutable string COPY_NO_SHIFT = q{
        dstN = cast(UIntN*)(dst8 + PRE_LOOP_ADJUST);
        srcN = cast(UIntN*)(src8 + PRE_LOOP_ADJUST);
        size_t length = count / TYPE_WIDTH;

        while (length & 7) {
            CP_INCR();
            length--;
        }

        length /= 8;

        while (length--) {
            CP(0);
            CP(1);
            CP(2);
            CP(3);
            CP(4);
            CP(5);
            CP(6);
            CP(7);

            INC_INDEX(dstN, 8);
            INC_INDEX(srcN, 8);
        }

        src8 = CAST_TO_U8(srcN, 0);
        dst8 = CAST_TO_U8(dstN, 0);

        { const cnt = (count & (TYPE_WIDTH - 1)); mixin(COPY_REMAINING); }
    };


    static immutable string COPY_SHIFT = q{
        dstN  = cast(UIntN*)(((cast(UIntN)dst8) + PRE_LOOP_ADJUST) &
                                 ~(TYPE_WIDTH - 1));
        srcN  = cast(UIntN*)(((cast(UIntN)src8) + PRE_LOOP_ADJUST) &
                                 ~(TYPE_WIDTH - 1));
        size_t length  = count / TYPE_WIDTH;
        srcWord = INC_VAL(srcN);

        while (length & 7) {
            CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift));
            length--;
        }

        length /= 8;

        while (length--) {
            CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift));
            CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift));

            INC_INDEX(dstN, 8);
            INC_INDEX(srcN, 8);
        }

        src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH));
        dst8 = CAST_TO_U8(dstN, 0);

        { const cnt = (count & (TYPE_WIDTH - 1)); mixin(COPY_REMAINING); }
    };


    if (count < 8) {
        const cnt = count;
        mixin(COPY_REMAINING);
        return dest;
    }

    START_VAL(dst8);
    START_VAL(src8);

    while ((cast(UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) {
        INC_VAL(dst8) = INC_VAL(src8);
        count--;
    }

    final switch (((cast(UIntN)src8) + PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) {
    case 0: mixin(COPY_NO_SHIFT); break;
    case 1: { const shift = 1; mixin(COPY_SHIFT); }   break;
    case 2: { const shift = 2; mixin(COPY_SHIFT); }   break;
    case 3: { const shift = 3; mixin(COPY_SHIFT); }   break;
    static if (TYPE_WIDTH >= 4)
    {
        case 4: { const shift = 4; mixin(COPY_SHIFT); }   break;
        case 5: { const shift = 5; mixin(COPY_SHIFT); }   break;
        case 6: { const shift = 6; mixin(COPY_SHIFT); }   break;
        case 7: { const shift = 7; mixin(COPY_SHIFT); }   break;
    }
    }

    return dest;
}

Reply via email to