# HG changeset patch # User Sumalatha Polureddy<sumala...@multicorewareinc.com> # Date 1434524988 -19800 # Wed Jun 17 12:39:48 2015 +0530 # Node ID 8774c63a4047fa5e54679237aeb88518740efa8d # Parent be0ed447922cc81e809d296e75424bb71822aea7 asm: avx2 code for pixel_avg[16xN, 32xN] for 10 bpp
sse2: avg_pp[32x32] 1662.52 3823.17 avg_pp[32x16] 874.92 2046.83 avg_pp[ 32x8] 479.38 1126.35 avg_pp[32x24] 1245.13 3170.44 avg_pp[32x64] 3187.47 7329.66 avx2: avg_pp[32x32] 1584.80 3699.90 avg_pp[32x16] 820.54 2264.90 avg_pp[ 32x8] 450.56 975.42 avg_pp[32x24] 1104.99 2727.65 avg_pp[32x64] 3046.60 7310.93 diff -r be0ed447922c -r 8774c63a4047 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue Jun 16 11:15:03 2015 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Jun 17 12:39:48 2015 +0530 @@ -1568,6 +1568,18 @@ p.pu[LUMA_64x48].sad_x4 = PFX(pixel_sad_x4_64x48_avx2); p.pu[LUMA_64x64].sad_x4 = PFX(pixel_sad_x4_64x64_avx2); + p.pu[LUMA_16x4].pixelavg_pp = PFX(pixel_avg_16x4_avx2); + p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_16x8_avx2); + p.pu[LUMA_16x12].pixelavg_pp = PFX(pixel_avg_16x12_avx2); + p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_16x16_avx2); + p.pu[LUMA_16x32].pixelavg_pp = PFX(pixel_avg_16x32_avx2); + p.pu[LUMA_16x64].pixelavg_pp = PFX(pixel_avg_16x64_avx2); + p.pu[LUMA_32x8].pixelavg_pp = PFX(pixel_avg_32x8_avx2); + p.pu[LUMA_32x16].pixelavg_pp = PFX(pixel_avg_32x16_avx2); + p.pu[LUMA_32x24].pixelavg_pp = PFX(pixel_avg_32x24_avx2); + p.pu[LUMA_32x32].pixelavg_pp = PFX(pixel_avg_32x32_avx2); + p.pu[LUMA_32x64].pixelavg_pp = PFX(pixel_avg_32x64_avx2); + p.pu[LUMA_16x4].convert_p2s = PFX(filterPixelToShort_16x4_avx2); p.pu[LUMA_16x8].convert_p2s = PFX(filterPixelToShort_16x8_avx2); p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_avx2); diff -r be0ed447922c -r 8774c63a4047 source/common/x86/mc-a.asm --- a/source/common/x86/mc-a.asm Tue Jun 16 11:15:03 2015 +0530 +++ b/source/common/x86/mc-a.asm Wed Jun 17 12:39:48 2015 +0530 @@ -3528,6 +3528,49 @@ AVG_WEIGHT 32, 8 AVG_WEIGHT 48, 8 AVG_WEIGHT 64, 8 + +%define BIWEIGHT BIWEIGHT_SSSE3 +%define BIWEIGHT_START BIWEIGHT_START_SSSE3 +INIT_YMM avx2 +cglobal pixel_avg_weight_w16 + BIWEIGHT_START + AVG_START 5 +.height_loop: + movu m0, [t2] + movu m1, [t4] + SBUTTERFLY wd, 0, 1, 2 + pmaddwd m0, m3 + pmaddwd m1, m3 + pmulld m0, m4 + pmulld m1, m4 + packusdw m0, m1 + mova [t0], m0 + AVG_END + +cglobal pixel_avg_weight_w32 + BIWEIGHT_START + AVG_START 5 +.height_loop: + movu m0, [t2] + movu m1, [t4] + SBUTTERFLY wd, 0, 1, 2 + pmaddwd m0, m3 + pmaddwd m1, m3 + pmulld m0, m4 + pmulld m1, m4 + packuswb m0, m1 + mova [t0], m0 + movu m0, [t2 + 32] + movu m1, [t4 + 32] + SBUTTERFLY wd, 0, 1, 2 + pmaddwd m0, m3 + pmaddwd m1, m3 + pmulld m0, m4 + pmulld m1, m4 + packusdw m0, m1 + mova [t0 + 32], m0 + AVG_END + %else ;!HIGH_BIT_DEPTH INIT_XMM sse2 AVG_WEIGHT 8, 7 @@ -4096,6 +4139,22 @@ AVG_FUNC 12, movdqu, movdqa, movq AVGH 12, 16 +INIT_XMM avx2 +AVG_FUNC 16, movdqu, movdqa +AVGH 16, 64 +AVGH 16, 32 +AVGH 16, 16 +AVGH 16, 12 +AVGH 16, 8 +AVGH 16, 4 + +AVG_FUNC 32, movdqu, movdqa +AVGH 32, 64 +AVGH 32, 32 +AVGH 32, 24 +AVGH 32, 16 +AVGH 32, 8 + %else ;!HIGH_BIT_DEPTH INIT_MMX mmx2 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel