# HG changeset patch # User Praveen Tiwari <prav...@multicorewareinc.com> # Date 1493905428 -19800 # Thu May 04 19:13:48 2017 +0530 # Node ID 41611825c2f4661536500e1306db7d8c4bf7fd07 # Parent 48502979a4b21f6982dcdacbf7796bf5d9fb395c avx2: 'integral4v' asm code -> 7.48x faster than 'C' version
integral_init4v 7.48x 202.53 1515.14 diff -r 48502979a4b2 -r 41611825c2f4 source/common/x86/seaintegral.asm --- a/source/common/x86/seaintegral.asm Wed May 03 11:26:26 2017 +0530 +++ b/source/common/x86/seaintegral.asm Thu May 04 19:13:48 2017 +0530 @@ -32,8 +32,19 @@ ;void integral_init4v_c(uint32_t *sum4, intptr_t stride) ;----------------------------------------------------------------------------- INIT_YMM avx2 -cglobal integral4v, 2, 2, 0 - +cglobal integral4v, 2, 3, 2 + mov r2, r1 + shl r2, 4 + +.loop + movu m0, [r0] + movu m1, [r0 + r2] + psubd m1, m0 + movu [r0], m1 + add r0, 32 + sub r1, 8 + cmp r1, 0 + jnz .loop RET ;----------------------------------------------------------------------------- _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel