On 12/29/2011 3:19 AM, Vladimir Panteleev wrote:
I'd like to invite you to translate Daniel Vik's C memcpy implementation to D: http://www.danielvik.com/2010/02/fast-memcpy-in-c.html
Challenge accepted. ------------------------ /******************************************************************** ** File: memcpy.c ** ** Copyright (C) 1999-2010 Daniel Vik ** ** This software is provided 'as-is', without any express or implied ** warranty. In no event will the authors be held liable for any ** damages arising from the use of this software. ** Permission is granted to anyone to use this software for any ** purpose, including commercial applications, and to alter it and ** redistribute it freely, subject to the following restrictions: ** ** 1. The origin of this software must not be misrepresented; you ** must not claim that you wrote the original software. If you ** use this software in a product, an acknowledgment in the ** use this software in a product, an acknowledgment in the ** product documentation would be appreciated but is not ** required. ** ** 2. Altered source versions must be plainly marked as such, and ** must not be misrepresented as being the original software. ** ** 3. This notice may not be removed or altered from any source ** distribution. ** ** ** Description: Implementation of the standard library function memcpy. ** This implementation of memcpy() is ANSI-C89 compatible. ** ** The following configuration options can be set: ** ** LITTLE_ENDIAN - Uses processor with little endian ** addressing. Default is big endian. ** ** PRE_INC_PTRS - Use pre increment of pointers. ** Default is post increment of ** pointers. ** ** INDEXED_COPY - Copying data using array indexing. ** Using this option, disables the ** PRE_INC_PTRS option. ** ** MEMCPY_64BIT - Compiles memcpy for 64 bit ** architectures ** ** ** Best Settings: ** ** Intel x86: LITTLE_ENDIAN and INDEXED_COPY ** *******************************************************************/ module memcpy; /******************************************************************** ** Configuration definitions. *******************************************************************/ version = LITTLE_ENDIAN; version = INDEXED_COPY; /******************************************************************** ** Includes for size_t definition *******************************************************************/ /******************************************************************** ** Typedefs *******************************************************************/ alias ubyte UInt8; alias ushort UInt16; alias uint UInt32; alias ulong UInt64; version (D_LP64) { alias UInt64 UIntN; enum TYPE_WIDTH = 8; } else { alias UInt32 UIntN; enum TYPE_WIDTH = 4; } /******************************************************************** ** Remove definitions when INDEXED_COPY is defined. *******************************************************************/ //#if defined (INDEXED_COPY) //#if defined (PRE_INC_PTRS) //#undef PRE_INC_PTRS //#endif /*PRE_INC_PTRS*/ //#endif /*INDEXED_COPY*/ /******************************************************************** ** Definitions for pre and post increment of pointers. *******************************************************************/ version (PRE_INC_PTRS) { void START_VAL(ref UInt8* x) { x--; } ref T INC_VAL(T)(ref T* x) { return *++x; } UInt8* CAST_TO_U8(void* p, int o) { return cast(UInt8*)p + o + TYPE_WIDTH; } enum WHILE_DEST_BREAK = (TYPE_WIDTH - 1); enum PRE_LOOP_ADJUST = -(TYPE_WIDTH - 1); enum PRE_SWITCH_ADJUST = 1; } else { void START_VAL(UInt8* x) { } ref T INC_VAL(T)(ref T* x) { return *x++; } UInt8* CAST_TO_U8(void* p, int o) { return cast(UInt8*)p + o; } enum WHILE_DEST_BREAK = 0; enum PRE_LOOP_ADJUST = 0; enum PRE_SWITCH_ADJUST = 0; } /******************************************************************** ** ** void *memcpy(void *dest, const void *src, size_t count) ** ** Args: dest - pointer to destination buffer ** src - pointer to source buffer ** count - number of bytes to copy ** ** Return: A pointer to destination buffer ** ** Purpose: Copies count bytes from src to dest. ** No overlap check is performed. ** *******************************************************************/ void *memcpy(void *dest, const void *src, size_t count) { auto dst8 = cast(UInt8*)dest; auto src8 = cast(UInt8*)src; UIntN* dstN; UIntN* srcN; UIntN dstWord; UIntN srcWord; /******************************************************************** ** Macros for copying words of different alignment. ** Uses incremening pointers. *******************************************************************/ void CP_INCR() { INC_VAL(dstN) = INC_VAL(srcN); } void CP_INCR_SH(int shl, int shr) { version (LITTLE_ENDIAN) { dstWord = srcWord >> shl; srcWord = INC_VAL(srcN); dstWord |= srcWord << shr; INC_VAL(dstN) = dstWord; } else { dstWord = srcWord << shl; srcWord = INC_VAL(srcN); dstWord |= srcWord >> shr; INC_VAL(dstN) = dstWord; } } /******************************************************************** ** Macros for copying words of different alignment. ** Uses array indexes. *******************************************************************/ void CP_INDEX(size_t idx) { dstN[idx] = srcN[idx]; } void CP_INDEX_SH(size_t x, int shl, int shr) { version (LITTLE_ENDIAN) { dstWord = srcWord >> shl; srcWord = srcN[x]; dstWord |= srcWord << shr; dstN[x] = dstWord; } else { dstWord = srcWord << shl; srcWord = srcN[x]; dstWord |= srcWord >> shr; dstN[x] = dstWord; } } /******************************************************************** ** Macros for copying words of different alignment. ** Uses incremening pointers or array indexes depending on ** configuration. *******************************************************************/ version (INDEXED_COPY) { void CP(size_t idx) { CP_INDEX(idx); } void CP_SH(size_t idx, int shl, int shr) { CP_INDEX_SH(idx, shl, shr); } void INC_INDEX(T)(ref T* p, size_t o) { p += o; } } else { void CP(size_t idx) { CP_INCR(); } void CP_SH(size_t idx, int shl, int shr) { CP_INCR_SH(shl, shr); } void INC_INDEX(T)(T* p, size_t o) { } } void COPY_REMAINING(size_t count) { START_VAL(dst8); START_VAL(src8); switch (count) { case 7: INC_VAL(dst8) = INC_VAL(src8); case 6: INC_VAL(dst8) = INC_VAL(src8); case 5: INC_VAL(dst8) = INC_VAL(src8); case 4: INC_VAL(dst8) = INC_VAL(src8); case 3: INC_VAL(dst8) = INC_VAL(src8); case 2: INC_VAL(dst8) = INC_VAL(src8); case 1: INC_VAL(dst8) = INC_VAL(src8); case 0: default: break; } } void COPY_NO_SHIFT() { dstN = cast(UIntN*)(dst8 + PRE_LOOP_ADJUST); srcN = cast(UIntN*)(src8 + PRE_LOOP_ADJUST); size_t length = count / TYPE_WIDTH; while (length & 7) { CP_INCR(); length--; } length /= 8; while (length--) { CP(0); CP(1); CP(2); CP(3); CP(4); CP(5); CP(6); CP(7); INC_INDEX(dstN, 8); INC_INDEX(srcN, 8); } src8 = CAST_TO_U8(srcN, 0); dst8 = CAST_TO_U8(dstN, 0); COPY_REMAINING(count & (TYPE_WIDTH - 1)); } void COPY_SHIFT(int shift) { dstN = cast(UIntN*)(((cast(UIntN)dst8) + PRE_LOOP_ADJUST) & ~(TYPE_WIDTH - 1)); srcN = cast(UIntN*)(((cast(UIntN)src8) + PRE_LOOP_ADJUST) & ~(TYPE_WIDTH - 1)); size_t length = count / TYPE_WIDTH; srcWord = INC_VAL(srcN); while (length & 7) { CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift)); length--; } length /= 8; while (length--) { CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift)); CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift)); CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift)); CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift)); CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift)); CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift)); CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift)); CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift)); INC_INDEX(dstN, 8); INC_INDEX(srcN, 8); } src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH)); dst8 = CAST_TO_U8(dstN, 0); COPY_REMAINING(count & (TYPE_WIDTH - 1)); } if (count < 8) { COPY_REMAINING(count); return dest; } START_VAL(dst8); START_VAL(src8); while ((cast(UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) { INC_VAL(dst8) = INC_VAL(src8); count--; } final switch (((cast(UIntN)src8) + PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) { case 0: COPY_NO_SHIFT(); break; case 1: COPY_SHIFT(1); break; case 2: COPY_SHIFT(2); break; case 3: COPY_SHIFT(3); break; static if (TYPE_WIDTH >= 4) { case 4: COPY_SHIFT(4); break; case 5: COPY_SHIFT(5); break; case 6: COPY_SHIFT(6); break; case 7: COPY_SHIFT(7); break; } } return dest; }