It is common practice to use RtlCopyMemory. Currently it is defined by the preprocessor to be memcpy, which outperforms our implementation, see previous benchmark commit.
This commit removes the benchmarks and our internal implementation in favor of RtlCopyMemory. RHBZ: 705785 --- xddm/build.bat | 0 xddm/display/amd64/x64.asm | 250 --------------------- xddm/display/driver.c | 7 - xddm/display/res.c | 392 ++------------------------------- xddm/display/res.h | 3 - xddm/display/sources | 4 +- xddm/tests/benchmark_format_results.py | 38 ---- xddm/tests/build_benchmark.bat | 7 - 8 files changed, 17 insertions(+), 684 deletions(-) mode change 100755 => 100644 xddm/build.bat delete mode 100644 xddm/display/amd64/x64.asm delete mode 100644 xddm/tests/benchmark_format_results.py delete mode 100644 xddm/tests/build_benchmark.bat diff --git a/xddm/build.bat b/xddm/build.bat old mode 100755 new mode 100644 diff --git a/xddm/display/amd64/x64.asm b/xddm/display/amd64/x64.asm deleted file mode 100644 index bb45d33..0000000 --- a/xddm/display/amd64/x64.asm +++ /dev/null @@ -1,250 +0,0 @@ -Extern have_sse2:DWORD - -.code - -CheckAndSetSSE2 proc - mov eax, 0000001h - cpuid - and edx, 4000000h - shr edx, 26 - mov have_sse2, edx - ret -CheckAndSetSSE2 endp - -RestoreFPU proc -; rcx PDev *pdev -; rdx size_t aligned_addr - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - movdqa xmm2, [rcx + 32] - movdqa xmm3, [rcx + 48] - ret -RestoreFPU endp - -SaveFPU proc -; rcx PDev *pdev -; rdx size_t aligned_addr - movdqa [rcx], xmm0 - movdqa [rcx + 16], xmm1 - movdqa [rcx + 32], xmm2 - movdqa [rcx + 48], xmm3 - ret -SaveFPU endp - -fast_memcpy_aligned proc -; rcx void *dest -; rdx const void *src -; r8 size_t len - ; Save rsi and rdi - mov r9, rsi - mov r10, rdi - - mov rsi, rdx - mov rdi, rcx - mov rcx, r8 - - cmp rcx, 128 - jb try_to_copy64 - - prefetchnta [rsi] - copy_128: - prefetchnta [rsi + 64] - - movdqa xmm0, [rsi] - movdqa xmm1, [rsi + 16] - movdqa xmm2, [rsi + 32] - movdqa xmm3, [rsi + 48] - - prefetchnta [rsi + 128] - - movntdq [rdi], xmm0 - movntdq [rdi + 16], xmm1 - movntdq [rdi + 32], xmm2 - movntdq [rdi + 48], xmm3 - - movdqa xmm0, [rsi + 64] - movdqa xmm1, [rsi + 80] - movdqa xmm2, [rsi + 96] - movdqa xmm3, [rsi + 112] - - movntdq [rdi + 64], xmm0 - movntdq [rdi + 80], xmm1 - movntdq [rdi + 96], xmm2 - movntdq [rdi + 112], xmm3 - - add rdi, 128 - add rsi, 128 - sub rcx, 128 - cmp rcx, 128 - jae copy_128 - - try_to_copy64: - cmp rcx, 64 - jb try_to_copy32 - - movdqa xmm0, [rsi] - movdqa xmm1, [rsi + 16] - movdqa xmm2, [rsi + 32] - movdqa xmm3, [rsi + 48] - - movntdq [rdi], xmm0 - movntdq [rdi + 16], xmm1 - movntdq [rdi + 32], xmm2 - movntdq [rdi + 48], xmm3 - - add rdi, 64 - add rsi, 64 - sub rcx, 64 - prefetchnta [rsi] - - try_to_copy32: - cmp rcx, 32 - jb try_to_copy16 - - movdqa xmm0, [rsi] - movdqa xmm1, [rsi + 16] - movntdq [rdi], xmm0 - movntdq [rdi + 16], xmm1 - - add rdi, 32 - add rsi, 32 - sub rcx, 32 - - try_to_copy16: - cmp rcx, 16 - jb try_to_copy4 - - movdqa xmm0, [rsi] - movntdq [rdi], xmm0 - - add rdi, 16 - add rsi, 16 - sub rcx, 16 - - - try_to_copy4: - cmp rcx, 4 - jb try_to_copy_1 - movsd - sub rcx, 4 - jmp try_to_copy4 - - try_to_copy_1: - rep movsb - - sfence - ; Save rsi and rdi - mov rsi, r9 - mov rdi, r10 - ret -fast_memcpy_aligned endp - -fast_memcpy_unaligned proc -; rcx void *dest -; rdx const void *src -; r8 size_t len - ; Save rsi and rdi - mov r9, rsi - mov r10, rdi - - mov rsi, rdx - mov rdi, rcx - mov rcx, r8 - - cmp rcx, 128 - jb try_to_copy64 - - prefetchnta [rsi] - copy_128: - prefetchnta [rsi + 64] - - movdqu xmm0, [rsi] - movdqu xmm1, [rsi + 16] - movdqu xmm2, [rsi + 32] - movdqu xmm3, [rsi + 48] - - prefetchnta [rsi + 128] - - movntdq [rdi], xmm0 - movntdq [rdi + 16], xmm1 - movntdq [rdi + 32], xmm2 - movntdq [rdi + 48], xmm3 - - movdqu xmm0, [rsi + 64] - movdqu xmm1, [rsi + 80] - movdqu xmm2, [rsi + 96] - movdqu xmm3, [rsi + 112] - - movntdq [rdi + 64], xmm0 - movntdq [rdi + 80], xmm1 - movntdq [rdi + 96], xmm2 - movntdq [rdi + 112], xmm3 - - add rdi, 128 - add rsi, 128 - sub rcx, 128 - cmp rcx, 128 - jae copy_128 - - try_to_copy64: - cmp rcx, 64 - jb try_to_copy32 - - movdqu xmm0, [rsi] - movdqu xmm1, [rsi + 16] - movdqu xmm2, [rsi + 32] - movdqu xmm3, [rsi + 48] - - movntdq [rdi], xmm0 - movntdq [rdi + 16], xmm1 - movntdq [rdi + 32], xmm2 - movntdq [rdi + 48], xmm3 - - add rdi, 64 - add rsi, 64 - sub rcx, 64 - prefetchnta [rsi] - - try_to_copy32: - cmp rcx, 32 - jb try_to_copy16 - - movdqu xmm0, [rsi] - movdqu xmm1, [rsi + 16] - movntdq [rdi], xmm0 - movntdq [rdi + 16], xmm1 - - add rdi, 32 - add rsi, 32 - sub rcx, 32 - - try_to_copy16: - cmp rcx, 16 - jb try_to_copy4 - - movdqu xmm0, [rsi] - movntdq [rdi], xmm0 - - add rdi, 16 - add rsi, 16 - sub rcx, 16 - - - try_to_copy4: - cmp rcx, 4 - jb try_to_copy_1 - movsd - sub rcx, 4 - jmp try_to_copy4 - - try_to_copy_1: - rep movsb - - sfence - ; restore rsi and rdi - mov rsi, r9 - mov rdi, r10 - ret -fast_memcpy_unaligned endp - -end \ No newline at end of file diff --git a/xddm/display/driver.c b/xddm/display/driver.c index bed1d58..6d196fb 100644 --- a/xddm/display/driver.c +++ b/xddm/display/driver.c @@ -246,9 +246,6 @@ BOOL DrvEnableDriver(ULONG engine_version, ULONG enable_data_size, PDRVENABLEDAT mspace_set_abort_func(mspace_abort); mspace_set_print_func(mspace_print); ResInitGlobals(); -#ifndef _WIN64 - CheckAndSetSSE2(); -#endif DEBUG_PRINT((NULL, 1, "%s: end\n", __FUNCTION__)); return TRUE; } @@ -903,8 +900,6 @@ VOID EnableQXLPrimarySurface(PDev *pdev) pdev->surf_enable = TRUE; } -void benchmark_memcpy(PDev *pdev); - HSURF DrvEnableSurface(DHPDEV in_pdev) { PDev *pdev; @@ -943,8 +938,6 @@ HSURF DrvEnableSurface(DHPDEV in_pdev) EnableQXLPrimarySurface(pdev); - benchmark_memcpy(pdev); - DEBUG_PRINT((pdev, 1, "%s: 0x%lx exit\n", __FUNCTION__, pdev)); return surf; diff --git a/xddm/display/res.c b/xddm/display/res.c index 589218b..86ed47f 100644 --- a/xddm/display/res.c +++ b/xddm/display/res.c @@ -36,11 +36,6 @@ #include "devioctl.h" #include "ntddvdeo.h" -void SaveFPU(PDev *pdev, size_t aligned_addr); -void RestoreFPU(PDev *pdev, size_t aligned_addr); -void fast_memcpy_unaligned(void *dest, const void *src, size_t len); -void fast_memcpy_aligned(void *dest, const void *src, size_t len); - static _inline QXLPHYSICAL PA(PDev *pdev, PVOID virt, UINT8 slot_id) { PMemSlot *p_slot = &pdev->mem_slots[slot_id]; @@ -129,12 +124,6 @@ typedef struct QXLOutput { UINT8 data[0]; } QXLOutput; -#ifndef _WIN64 -static int have_sse2 = FALSE; -#else -int have_sse2 = FALSE; -#endif - #ifndef DBG static _inline void DebugShowOutput(PDev *pdev, QXLOutput* output) { @@ -1077,285 +1066,22 @@ static BOOL SetClip(PDev *pdev, CLIPOBJ *clip, QXLDrawable *drawable) return TRUE; } -#ifndef _WIN64 - -static _inline void fast_memcpy_aligment(void *dest, const void *src, size_t len) -{ - _asm - { - mov ecx, len - mov esi, src - mov edi, dest - - cmp ecx, 128 - jb try_to_copy64 - - prefetchnta [esi] - copy_128: - prefetchnta [esi + 64] - - movdqa xmm0, [esi] - movdqa xmm1, [esi + 16] - movdqa xmm2, [esi + 32] - movdqa xmm3, [esi + 48] - - prefetchnta [esi + 128] - - movntdq [edi], xmm0 - movntdq [edi + 16], xmm1 - movntdq [edi + 32], xmm2 - movntdq [edi + 48], xmm3 - - movdqa xmm0, [esi + 64] - movdqa xmm1, [esi + 80] - movdqa xmm2, [esi + 96] - movdqa xmm3, [esi + 112] - - movntdq [edi + 64], xmm0 - movntdq [edi + 80], xmm1 - movntdq [edi + 96], xmm2 - movntdq [edi + 112], xmm3 - - add edi, 128 - add esi, 128 - sub ecx, 128 - cmp ecx, 128 - jae copy_128 - - try_to_copy64: - cmp ecx, 64 - jb try_to_copy32 - - movdqa xmm0, [esi] - movdqa xmm1, [esi + 16] - movdqa xmm2, [esi + 32] - movdqa xmm3, [esi + 48] - - movntdq [edi], xmm0 - movntdq [edi + 16], xmm1 - movntdq [edi + 32], xmm2 - movntdq [edi + 48], xmm3 - - add edi, 64 - add esi, 64 - sub ecx, 64 - prefetchnta [esi] - - try_to_copy32: - cmp ecx, 32 - jb try_to_copy16 - - movdqa xmm0, [esi] - movdqa xmm1, [esi + 16] - movntdq [edi], xmm0 - movntdq [edi + 16], xmm1 - - add edi, 32 - add esi, 32 - sub ecx, 32 - - try_to_copy16: - cmp ecx, 16 - jb try_to_copy4 - - movdqa xmm0, [esi] - movntdq [edi], xmm0 - - add edi, 16 - add esi, 16 - sub ecx, 16 - - - try_to_copy4: - cmp ecx, 4 - jb try_to_copy_1 - movsd - sub ecx, 4 - jmp try_to_copy4 - - try_to_copy_1: - rep movsb - - sfence - } -} - -static _inline void fast_memcpy_unaligment(void *dest, const void *src, size_t len) -{ - _asm - { - mov ecx, len - mov esi, src - mov edi, dest - - cmp ecx, 128 - jb try_to_copy64 - - prefetchnta [esi] - copy_128: - prefetchnta [esi + 64] - - movdqu xmm0, [esi] - movdqu xmm1, [esi + 16] - movdqu xmm2, [esi + 32] - movdqu xmm3, [esi + 48] - - prefetchnta [esi + 128] - - movntdq [edi], xmm0 - movntdq [edi + 16], xmm1 - movntdq [edi + 32], xmm2 - movntdq [edi + 48], xmm3 - - movdqu xmm0, [esi + 64] - movdqu xmm1, [esi + 80] - movdqu xmm2, [esi + 96] - movdqu xmm3, [esi + 112] - - movntdq [edi + 64], xmm0 - movntdq [edi + 80], xmm1 - movntdq [edi + 96], xmm2 - movntdq [edi + 112], xmm3 - - add edi, 128 - add esi, 128 - sub ecx, 128 - cmp ecx, 128 - jae copy_128 - - try_to_copy64: - cmp ecx, 64 - jb try_to_copy32 - - movdqu xmm0, [esi] - movdqu xmm1, [esi + 16] - movdqu xmm2, [esi + 32] - movdqu xmm3, [esi + 48] - - movntdq [edi], xmm0 - movntdq [edi + 16], xmm1 - movntdq [edi + 32], xmm2 - movntdq [edi + 48], xmm3 - - add edi, 64 - add esi, 64 - sub ecx, 64 - prefetchnta [esi] - - try_to_copy32: - cmp ecx, 32 - jb try_to_copy16 - - movdqu xmm0, [esi] - movdqu xmm1, [esi + 16] - movntdq [edi], xmm0 - movntdq [edi + 16], xmm1 - - add edi, 32 - add esi, 32 - sub ecx, 32 - - try_to_copy16: - cmp ecx, 16 - jb try_to_copy4 - - movdqu xmm0, [esi] - movntdq [edi], xmm0 - - add edi, 16 - add esi, 16 - sub ecx, 16 - - - try_to_copy4: - cmp ecx, 4 - jb try_to_copy_1 - movsd - sub ecx, 4 - jmp try_to_copy4 - - try_to_copy_1: - rep movsb - - sfence - } -} - -#endif - -uint64_t time_usecs(void) -{ - ENG_TIME_FIELDS systime; - EngQueryLocalTime(&systime); - return (uint64_t)(systime.usMilliseconds * 1000 + systime.usSecond * 1e6 + - systime.usMinute * 60e6 + systime.usHour * 3600e6); -} - -void benchmark_memcpy(PDev *pdev) -{ - size_t i; - unsigned char *src_unaligned; - unsigned char *dest_unaligned; - uint64_t start, total1, total2; - unsigned char *src = NULL; - unsigned char *dest = NULL; - size_t size = 1024; - size_t iter = 1024 * 1024; - - for (size = 1024; size < 1024*1024*2; size *= 2, iter /= 2) { - src_unaligned = EngAllocMem(0, size + 31, ALLOC_TAG); - dest_unaligned = EngAllocMem(0, size + 31, ALLOC_TAG); - src = (unsigned char *)((size_t)(src_unaligned + 31) & ~0x1f); - dest = (unsigned char *)((size_t)(dest_unaligned + 31) & ~0x1f); - - for (i = 0 ; i < size ; ++i) - src[i] = i; - - start = time_usecs(); - for (i = 0 ; i < iter ; ++i) { - fast_memcpy_aligned(dest, src, size); - } - total2 = time_usecs() - start; - - { - int errors = 0; - for (i = 0 ; i < size ; ++i) { - if (dest[i] != src[i]) { - errors++; - } - } - if (errors > 0) { - DEBUG_PRINT((pdev, 1, "!!! copy errors %d !!!\n", errors)); - } - } - - start = time_usecs(); - for (i = 0 ; i < iter ; ++i) - memcpy(dest, src, size); - total1 = time_usecs() - start; - - DEBUG_PRINT((pdev, 1, "%d: %lld, %lld\n", size, total1, total2)); - EngFreeMem(src_unaligned); - EngFreeMem(dest_unaligned); - } -} - #ifdef DBG #define PutBytesAlign __PutBytesAlign -#define PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size, use_sse)\ - __PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, 1, use_sse) +#define PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size)\ + __PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, 1) #else -#define PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, alignment, use_sse)\ - __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, alignment, use_sse) -#define PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size, use_sse)\ - __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, 1, use_sse) +#define PutBytesAlign(pdev, chunk, now, end, src, size, page_counter, alloc_size, alignment)\ + __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, alignment) +#define PutBytes(pdev, chunk, now, end, src, size, page_counter, alloc_size)\ + __PutBytesAlign(pdev, chunk, now, end, src, size, NULL, alloc_size, 1) #endif #define BITS_BUF_MAX (64 * 1024) static void __PutBytesAlign(PDev *pdev, QXLDataChunk **chunk_ptr, UINT8 **now_ptr, UINT8 **end_ptr, UINT8 *src, int size, int *page_counter, - size_t alloc_size, uint32_t alignment, BOOL use_sse) + size_t alloc_size, uint32_t alignment) { QXLDataChunk *chunk = *chunk_ptr; UINT8 *now = *now_ptr; @@ -1374,34 +1100,7 @@ static void __PutBytesAlign(PDev *pdev, QXLDataChunk **chunk_ptr, UINT8 **now_pt NEW_DATA_CHUNK(page_counter, aligned_size); cp_size = (int)MIN(end - now, size); } - if (use_sse) { - offset = (size_t)now & SSE_MASK; - if (offset) { - offset = SSE_ALIGN - offset; - if (offset >= cp_size) { - RtlCopyMemory(now, src, cp_size); - src += cp_size; - now += cp_size; - chunk->data_size += cp_size; - size -= cp_size; - continue; - } - RtlCopyMemory(now, src, offset); - now += offset; - src += offset; - size -= offset; - cp_size -= offset; - chunk->data_size += offset; - } - - if (((size_t)src & SSE_MASK) == 0) { - fast_memcpy_aligment(now, src, cp_size); - } else { - fast_memcpy_unaligment(now, src, cp_size); - } - } else { - RtlCopyMemory(now, src, cp_size); - } + RtlCopyMemory(now, src, cp_size); src += cp_size; now += cp_size; chunk->data_size += cp_size; @@ -1871,36 +1570,6 @@ static void FreeBitmapImage(PDev *pdev, Resource *res) // todo: defer DEBUG_PRINT((pdev, 13, "%s: done\n", __FUNCTION__)); } -#ifndef _WIN64 - -static _inline void RestoreFPU(PDev *pdev, size_t aligned_addr) -{ - _asm - { - mov esi, aligned_addr - - movdqa xmm0, [esi] - movdqa xmm1, [esi + 16] - movdqa xmm2, [esi + 32] - movdqa xmm3, [esi + 48] - } -} - -static _inline void SaveFPU(PDev *pdev, size_t aligned_addr) -{ - _asm - { - mov edi, aligned_addr - - movdqa [edi], xmm0 - movdqa [edi + 16], xmm1 - movdqa [edi + 32], xmm2 - movdqa [edi + 48], xmm3 - } -} - -#endif - static void FreeSurfaceImage(PDev *pdev, Resource *res) { DEBUG_PRINT((pdev, 12, "%s\n", __FUNCTION__)); @@ -1923,9 +1592,6 @@ static _inline Resource *GetBitmapImage(PDev *pdev, SURFOBJ *surf, XLATEOBJ *col UINT8 *src_end; UINT8 *dest; UINT8 *dest_end; - UINT8 FPUSaveUnaligned[16 * 4 + 15]; - size_t FPUSave = ALIGN((size_t)(FPUSaveUnaligned), SSE_ALIGN); - BOOL use_sse = FALSE; DEBUG_PRINT((pdev, 12, "%s\n", __FUNCTION__)); ASSERT(pdev, width > 0 && height > 0); @@ -1963,16 +1629,9 @@ static _inline Resource *GetBitmapImage(PDev *pdev, SURFOBJ *surf, XLATEOBJ *col dest_end = (UINT8 *)image_res + alloc_size; alloc_size = height * line_size; - if (have_sse2 && alloc_size >= 1024) { - use_sse = TRUE; - SaveFPU(pdev, FPUSave); - } for (; src != src_end; src -= surf->lDelta, alloc_size -= line_size) { PutBytesAlign(pdev, &chunk, &dest, &dest_end, src, line_size, - &pdev->num_bits_pages, alloc_size, line_size, use_sse); - } - if (use_sse) { - RestoreFPU(pdev, FPUSave); + &pdev->num_bits_pages, alloc_size, line_size); } GetPallette(pdev, &internal->image.bitmap, color_trans); @@ -3104,7 +2763,7 @@ static BOOL GetCursorCommon(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_ src_end = src + (local_surf->lDelta * local_surf->sizlBitmap.cy); for (; src != src_end; src += local_surf->lDelta) { PutBytes(pdev, &info->chunk, &info->now, &info->end, src, line_size, - &pdev->num_cursor_pages, PAGE_SIZE, FALSE); + &pdev->num_cursor_pages, PAGE_SIZE); } CursorCacheAdd(pdev, internal); @@ -3223,14 +2882,14 @@ BOOL GetColorCursor(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_y, SURFO if (pdev->bitmap_format == BMF_32BPP) { PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)color_trans->pulXlate, - 256 << 2, &pdev->num_cursor_pages, PAGE_SIZE, FALSE); + 256 << 2, &pdev->num_cursor_pages, PAGE_SIZE); } else { int i; for (i = 0; i < 256; i++) { UINT32 ent = _16bppTo32bpp(color_trans->pulXlate[i]); PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)&ent, - 4, &pdev->num_cursor_pages, PAGE_SIZE, FALSE); + 4, &pdev->num_cursor_pages, PAGE_SIZE); } } info.cursor->data_size += 256 << 2; @@ -3243,14 +2902,14 @@ BOOL GetColorCursor(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_y, SURFO if (pdev->bitmap_format == BMF_32BPP) { PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)color_trans->pulXlate, - 16 << 2, &pdev->num_cursor_pages, PAGE_SIZE, FALSE); + 16 << 2, &pdev->num_cursor_pages, PAGE_SIZE); } else { int i; for (i = 0; i < 16; i++) { UINT32 ent = _16bppTo32bpp(color_trans->pulXlate[i]); PutBytes(pdev, &info.chunk, &info.now, &info.end, (UINT8 *)&ent, - 4, &pdev->num_cursor_pages, PAGE_SIZE, FALSE); + 4, &pdev->num_cursor_pages, PAGE_SIZE); } } info.cursor->data_size += 16 << 2; @@ -3266,7 +2925,7 @@ BOOL GetColorCursor(PDev *pdev, QXLCursorCmd *cmd, LONG hot_x, LONG hot_y, SURFO for (; src != src_end; src += mask->lDelta) { PutBytes(pdev, &info.chunk, &info.now, &info.end, src, line_size, - &pdev->num_cursor_pages, PAGE_SIZE, FALSE); + &pdev->num_cursor_pages, PAGE_SIZE); } } @@ -3403,23 +3062,4 @@ void ResDestroyGlobals() { EngDeleteSemaphore(image_id_sem); image_id_sem = NULL; -} - -#ifndef _WIN64 - -void CheckAndSetSSE2() -{ - _asm - { - mov eax, 0x0000001 - cpuid - and edx, 0x4000000 - mov have_sse2, edx - } - - if (have_sse2) { - have_sse2 = TRUE; - } -} - -#endif +} \ No newline at end of file diff --git a/xddm/display/res.h b/xddm/display/res.h index 4d179c5..0263fcf 100644 --- a/xddm/display/res.h +++ b/xddm/display/res.h @@ -67,9 +67,6 @@ BOOL ResInit(PDev *pdev); void ResDestroy(PDev *pdev); void ResInitGlobals(); void ResDestroyGlobals(); -#ifndef _WIN64 -void CheckAndSetSSE2(); -#endif void EmptyReleaseRing(PDev *pdev); void InitDeviceMemoryResources(PDev *pdev); void ReleaseCacheDeviceMemoryResources(PDev *pdev); diff --git a/xddm/display/sources b/xddm/display/sources index 8a4f664..a6d3eaf 100644 --- a/xddm/display/sources +++ b/xddm/display/sources @@ -30,6 +30,4 @@ SOURCES=driver.c \ mspace.c \ quic.c \ surface.c \ - driver.rc - -AMD64_SOURCES=amd64\x64.asm + driver.rc \ No newline at end of file diff --git a/xddm/tests/benchmark_format_results.py b/xddm/tests/benchmark_format_results.py deleted file mode 100644 index 96d302b..0000000 --- a/xddm/tests/benchmark_format_results.py +++ /dev/null @@ -1,38 +0,0 @@ -import sys - -win7_32="""qxl/guest-0: 96463384453: qxldd: 1024: 47000, 109000 -qxl/guest-0: 96591785177: qxldd: 2048: 31000, 109000 -qxl/guest-0: 96722899152: qxldd: 4096: 16000, 109000 -qxl/guest-0: 96851422238: qxldd: 8192: 31000, 94000 -qxl/guest-0: 97013842048: qxldd: 16384: 31000, 141000 -qxl/guest-0: 97167323122: qxldd: 32768: 31000, 125000 -qxl/guest-0: 97316872306: qxldd: 65536: 31000, 109000 -qxl/guest-0: 97465747407: qxldd: 131072: 47000, 109000 -qxl/guest-0: 97624668249: qxldd: 262144: 47000, 109000 -qxl/guest-0: 97785876639: qxldd: 524288: 62000, 94000 -qxl/guest-0: 97953480643: qxldd: 1048576: 62000, 110000 -""" - -win7_64=""" -qxl/guest-0: 2278149101498: qxldd: 1024: 78000, 109000 -qxl/guest-0: 2278288271327: qxldd: 2048: 46000, 94000 -qxl/guest-0: 2278428135167: qxldd: 4096: 47000, 94000 -qxl/guest-0: 2278575078269: qxldd: 8192: 47000, 93000 -qxl/guest-0: 2278734906600: qxldd: 16384: 47000, 109000 -qxl/guest-0: 2278896881683: qxldd: 32768: 63000, 109000 -qxl/guest-0: 2279073699223: qxldd: 65536: 46000, 125000 -qxl/guest-0: 2279250403663: qxldd: 131072: 62000, 110000 -qxl/guest-0: 2279467314681: qxldd: 262144: 93000, 125000 -qxl/guest-0: 2279693375414: qxldd: 524288: 109000, 125000 -qxl/guest-0: 2279929972847: qxldd: 1048576: 109000, 125000 -""" - -filt = lambda txt: filt2(filt1(txt)) -filt2 = lambda data: [(s, system, ours*100.0/system) for t, s, system, ours in data] -filt1 = lambda txt: map(lambda a: (int(a[1][:-1]), int(a[3][:-1]), int(a[4][:-1]), int(a[5])), map(lambda l: l.strip().split(), [l for l in txt.split('\n') if l.strip() != ''])) -display = lambda txt: sys.stdout.write('\n'.join('%10s %10s' % (a, '%3.0f' % c) for a, b, c in filt(txt))+'\n') - -print('size [bytes]'.ljust(18) + 'our time/system time [percent]') -display(win7_32) -print('size [bytes]'.ljust(18) + 'our time/system time [percent]') -display(win7_64) diff --git a/xddm/tests/build_benchmark.bat b/xddm/tests/build_benchmark.bat deleted file mode 100644 index a184249..0000000 --- a/xddm/tests/build_benchmark.bat +++ /dev/null @@ -1,7 +0,0 @@ -cl /Zi /nologo /c /I %CRT_INC_PATH% ..\display\benchmark_memcpy.c -if defined AMD64 ( -ml64 /c /Zd ..\display\amd64\x64.asm -link /nologo /debug /libpath:%BASEDIR%\lib\crt\amd64\ /libpath:%DDK_LIB_DEST%\amd64 x64.obj benchmark_memcpy.obj -) else ( -link /nologo /debug /libpath:%BASEDIR%\lib\crt\i386\ /libpath:%DDK_LIB_DEST%\i386 benchmark_memcpy.obj -) -- 1.9.0 _______________________________________________ Spice-devel mailing list Spice-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/spice-devel