This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 338dc256422701134457c6326f5604f528c2f381 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Wed Apr 8 20:35:06 2026 +0200 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Mon Apr 13 12:53:17 2026 +0200 avcodec/x86/snowdsp_init: Remove MMXEXT, SSE2 inner_add_yblock versions They have been superseded by SSSE3; the SSE2 version was even disabled (and segfaults if enabled). Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/snowdsp_init.c | 281 ------------------------------------------ tests/checkasm/snowdsp.c | 18 +-- 2 files changed, 1 insertion(+), 298 deletions(-) diff --git a/libavcodec/x86/snowdsp_init.c b/libavcodec/x86/snowdsp_init.c index 2a120cd0e0..18c7cfd364 100644 --- a/libavcodec/x86/snowdsp_init.c +++ b/libavcodec/x86/snowdsp_init.c @@ -612,279 +612,6 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM } #endif //HAVE_7REGS -#if HAVE_6REGS -#define snow_inner_add_yblock_sse2_header \ - x86_reg tmp;\ - __asm__ volatile(\ - "mov %7, %%"FF_REG_c" \n\t"\ - "mov %6, %2 \n\t"\ - "mov %4, %%"FF_REG_S" \n\t"\ - "pxor %%xmm7, %%xmm7 \n\t" /* 0 */\ - "pcmpeqd %%xmm3, %%xmm3 \n\t"\ - "psllw $15, %%xmm3 \n\t"\ - "psrlw $12, %%xmm3 \n\t" /* FRAC_BITS >> 1 */\ - "1: \n\t"\ - "mov %1, %%"FF_REG_D" \n\t"\ - "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\ - "add %3, %%"FF_REG_D" \n\t" - -#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\ - "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ - "movq (%%"FF_REG_d"), %%"out_reg1" \n\t"\ - "movq (%%"FF_REG_d", %%"FF_REG_c"), %%"out_reg2" \n\t"\ - "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ - "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ - "movq "s_offset"(%%"FF_REG_S"), %%xmm0 \n\t"\ - "movq "s_offset"+16(%%"FF_REG_S"), %%xmm4 \n\t"\ - "punpcklbw %%xmm7, %%xmm0 \n\t"\ - "punpcklbw %%xmm7, %%xmm4 \n\t"\ - "pmullw %%xmm0, %%"out_reg1" \n\t"\ - "pmullw %%xmm4, %%"out_reg2" \n\t" - -#define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\ - "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ - "movq (%%"FF_REG_d"), %%"out_reg1" \n\t"\ - "movq 8(%%"FF_REG_d"), %%"out_reg2" \n\t"\ - "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ - "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ - "movq "s_offset"(%%"FF_REG_S"), %%xmm0 \n\t"\ - "movq "s_offset"+8(%%"FF_REG_S"), %%xmm4 \n\t"\ - "punpcklbw %%xmm7, %%xmm0 \n\t"\ - "punpcklbw %%xmm7, %%xmm4 \n\t"\ - "pmullw %%xmm0, %%"out_reg1" \n\t"\ - "pmullw %%xmm4, %%"out_reg2" \n\t" - -#define snow_inner_add_yblock_sse2_accum_8(ptr_offset, s_offset) \ - snow_inner_add_yblock_sse2_start_8("xmm2", "xmm6", ptr_offset, s_offset)\ - "paddusw %%xmm2, %%xmm1 \n\t"\ - "paddusw %%xmm6, %%xmm5 \n\t" - -#define snow_inner_add_yblock_sse2_accum_16(ptr_offset, s_offset) \ - snow_inner_add_yblock_sse2_start_16("xmm2", "xmm6", ptr_offset, s_offset)\ - "paddusw %%xmm2, %%xmm1 \n\t"\ - "paddusw %%xmm6, %%xmm5 \n\t" - -#define snow_inner_add_yblock_sse2_end_common1\ - "add $32, %%"FF_REG_S" \n\t"\ - "add %%"FF_REG_c", %0 \n\t"\ - "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\ - "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\ - "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\ - "add %%"FF_REG_c", (%%"FF_REG_a") \n\t" - -#define snow_inner_add_yblock_sse2_end_common2\ - "jnz 1b \n\t"\ - :"+m"(dst8),"+m"(lines),"=&r"(tmp)\ - :\ - "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\ - XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\ - "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d""); - -#define snow_inner_add_yblock_sse2_end_8\ - "sal $1, %%"FF_REG_c" \n\t"\ - "add"FF_OPSIZE" $"FF_PTR_SIZE"*2, %1 \n\t"\ - snow_inner_add_yblock_sse2_end_common1\ - "sar $1, %%"FF_REG_c" \n\t"\ - "sub $2, %2 \n\t"\ - snow_inner_add_yblock_sse2_end_common2 - -#define snow_inner_add_yblock_sse2_end_16\ - "add"FF_OPSIZE" $"FF_PTR_SIZE"*1, %1 \n\t"\ - snow_inner_add_yblock_sse2_end_common1\ - "dec %2 \n\t"\ - snow_inner_add_yblock_sse2_end_common2 - -static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, - int src_x, x86_reg src_stride, IDWTELEM *const *lines, int add, uint8_t * dst8) -{ -snow_inner_add_yblock_sse2_header -snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0") -snow_inner_add_yblock_sse2_accum_8("2", "8") -snow_inner_add_yblock_sse2_accum_8("1", "128") -snow_inner_add_yblock_sse2_accum_8("0", "136") - - "mov %0, %%"FF_REG_d" \n\t" - "movdqa (%%"FF_REG_D"), %%xmm0 \n\t" - "movdqa %%xmm1, %%xmm2 \n\t" - - "punpckhwd %%xmm7, %%xmm1 \n\t" - "punpcklwd %%xmm7, %%xmm2 \n\t" - "paddd %%xmm2, %%xmm0 \n\t" - "movdqa 16(%%"FF_REG_D"), %%xmm2\n\t" - "paddd %%xmm1, %%xmm2 \n\t" - "paddd %%xmm3, %%xmm0 \n\t" - "paddd %%xmm3, %%xmm2 \n\t" - - "mov %1, %%"FF_REG_D" \n\t" - "mov "FF_PTR_SIZE"(%%"FF_REG_D"), %%"FF_REG_D"; \n\t" - "add %3, %%"FF_REG_D" \n\t" - - "movdqa (%%"FF_REG_D"), %%xmm4 \n\t" - "movdqa %%xmm5, %%xmm6 \n\t" - "punpckhwd %%xmm7, %%xmm5 \n\t" - "punpcklwd %%xmm7, %%xmm6 \n\t" - "paddd %%xmm6, %%xmm4 \n\t" - "movdqa 16(%%"FF_REG_D"), %%xmm6\n\t" - "paddd %%xmm5, %%xmm6 \n\t" - "paddd %%xmm3, %%xmm4 \n\t" - "paddd %%xmm3, %%xmm6 \n\t" - - "psrad $8, %%xmm0 \n\t" /* FRAC_BITS. */ - "psrad $8, %%xmm2 \n\t" /* FRAC_BITS. */ - "packssdw %%xmm2, %%xmm0 \n\t" - "packuswb %%xmm7, %%xmm0 \n\t" - "movq %%xmm0, (%%"FF_REG_d") \n\t" - - "psrad $8, %%xmm4 \n\t" /* FRAC_BITS. */ - "psrad $8, %%xmm6 \n\t" /* FRAC_BITS. */ - "packssdw %%xmm6, %%xmm4 \n\t" - "packuswb %%xmm7, %%xmm4 \n\t" - "movq %%xmm4, (%%"FF_REG_d",%%"FF_REG_c"); \n\t" -snow_inner_add_yblock_sse2_end_8 -} - -static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, - int src_x, x86_reg src_stride, IDWTELEM *const *lines, int add, uint8_t * dst8) -{ -snow_inner_add_yblock_sse2_header -snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0") -snow_inner_add_yblock_sse2_accum_16("2", "16") -snow_inner_add_yblock_sse2_accum_16("1", "512") -snow_inner_add_yblock_sse2_accum_16("0", "528") - - "mov %0, %%"FF_REG_d" \n\t" - "psrlw $2, %%xmm1 \n\t" - "psrlw $2, %%xmm5 \n\t" - "paddw (%%"FF_REG_D"), %%xmm1 \n\t" - "paddw 16(%%"FF_REG_D"), %%xmm5 \n\t" - "paddw %%xmm3, %%xmm1 \n\t" - "paddw %%xmm3, %%xmm5 \n\t" - "psraw $4, %%xmm1 \n\t" /* FRAC_BITS. */ - "psraw $4, %%xmm5 \n\t" /* FRAC_BITS. */ - "packuswb %%xmm5, %%xmm1 \n\t" - - "movdqu %%xmm1, (%%"FF_REG_d") \n\t" - -snow_inner_add_yblock_sse2_end_16 -} - -#define snow_inner_add_yblock_mmx_header \ - x86_reg tmp;\ - __asm__ volatile(\ - "mov %7, %%"FF_REG_c" \n\t"\ - "mov %6, %2 \n\t"\ - "mov %4, %%"FF_REG_S" \n\t"\ - "pxor %%mm7, %%mm7 \n\t" /* 0 */\ - "pcmpeqd %%mm3, %%mm3 \n\t"\ - "psllw $15, %%mm3 \n\t"\ - "psrlw $12, %%mm3 \n\t" /* FRAC_BITS >> 1 */\ - "1: \n\t"\ - "mov %1, %%"FF_REG_D" \n\t"\ - "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\ - "add %3, %%"FF_REG_D" \n\t" - -#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\ - "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ - "movd "d_offset"(%%"FF_REG_d"), %%"out_reg1" \n\t"\ - "movd "d_offset"+4(%%"FF_REG_d"), %%"out_reg2" \n\t"\ - "punpcklbw %%mm7, %%"out_reg1" \n\t"\ - "punpcklbw %%mm7, %%"out_reg2" \n\t"\ - "movd "s_offset"(%%"FF_REG_S"), %%mm0 \n\t"\ - "movd "s_offset"+4(%%"FF_REG_S"), %%mm4 \n\t"\ - "punpcklbw %%mm7, %%mm0 \n\t"\ - "punpcklbw %%mm7, %%mm4 \n\t"\ - "pmullw %%mm0, %%"out_reg1" \n\t"\ - "pmullw %%mm4, %%"out_reg2" \n\t" - -#define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \ - snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\ - "paddusw %%mm2, %%mm1 \n\t"\ - "paddusw %%mm6, %%mm5 \n\t" - -#define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\ - "mov %0, %%"FF_REG_d" \n\t"\ - "psrlw $2, %%mm1 \n\t"\ - "psrlw $2, %%mm5 \n\t"\ - "paddw "read_offset"(%%"FF_REG_D"), %%mm1 \n\t"\ - "paddw "read_offset"+8(%%"FF_REG_D"), %%mm5 \n\t"\ - "paddw %%mm3, %%mm1 \n\t"\ - "paddw %%mm3, %%mm5 \n\t"\ - "psraw $4, %%mm1 \n\t"\ - "psraw $4, %%mm5 \n\t"\ - "packuswb %%mm5, %%mm1 \n\t"\ - "movq %%mm1, "write_offset"(%%"FF_REG_d") \n\t" - -#define snow_inner_add_yblock_mmx_end(s_step)\ - "add $"s_step", %%"FF_REG_S" \n\t"\ - "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\ - "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\ - "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\ - "add %%"FF_REG_c", (%%"FF_REG_a") \n\t"\ - "add"FF_OPSIZE " $"FF_PTR_SIZE"*1, %1 \n\t"\ - "add %%"FF_REG_c", %0 \n\t"\ - "dec %2 \n\t"\ - "jnz 1b \n\t"\ - :"+m"(dst8),"+m"(lines),"=&r"(tmp)\ - :\ - "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\ - "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d""); - -static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, - int src_x, x86_reg src_stride, IDWTELEM *const *lines, int add, uint8_t * dst8) -{ -snow_inner_add_yblock_mmx_header -snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") -snow_inner_add_yblock_mmx_accum("2", "8", "0") -snow_inner_add_yblock_mmx_accum("1", "128", "0") -snow_inner_add_yblock_mmx_accum("0", "136", "0") -snow_inner_add_yblock_mmx_mix("0", "0") -snow_inner_add_yblock_mmx_end("16") -} - -static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, - int src_x, x86_reg src_stride, IDWTELEM *const *lines, int add, uint8_t * dst8) -{ -snow_inner_add_yblock_mmx_header -snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") -snow_inner_add_yblock_mmx_accum("2", "16", "0") -snow_inner_add_yblock_mmx_accum("1", "512", "0") -snow_inner_add_yblock_mmx_accum("0", "528", "0") -snow_inner_add_yblock_mmx_mix("0", "0") - -snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "8", "8") -snow_inner_add_yblock_mmx_accum("2", "24", "8") -snow_inner_add_yblock_mmx_accum("1", "520", "8") -snow_inner_add_yblock_mmx_accum("0", "536", "8") -snow_inner_add_yblock_mmx_mix("16", "8") -snow_inner_add_yblock_mmx_end("32") -} - -static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, - int src_x, int src_stride, IDWTELEM *const *lines, int add, uint8_t * dst8) -{ - if (b_w == 16) - inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x, src_stride, lines, add, dst8); - else if (b_w == 8 && obmc_stride == 16) { - if (!(b_h & 1)) - inner_add_yblock_bw_8_obmc_16_bh_even_sse2(obmc, obmc_stride, block, b_w, b_h, src_x, src_stride, lines, add, dst8); - else - inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x, src_stride, lines, add, dst8); - } else - ff_snow_inner_add_yblock_c(obmc, obmc_stride, block, b_w, b_h, src_x, src_stride, lines, add, dst8); -} - -static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, - int src_x, int src_stride, IDWTELEM *const *lines, int add, uint8_t * dst8) -{ - if (b_w == 16) - inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x, src_stride, lines, add, dst8); - else if (b_w == 8 && obmc_stride == 16) - inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x, src_stride, lines, add, dst8); - else - ff_snow_inner_add_yblock_c(obmc, obmc_stride, block, b_w, b_h, src_x, src_stride, lines, add, dst8); -} -#endif /* HAVE_6REGS */ - #endif /* HAVE_INLINE_ASM */ av_cold void ff_dwt_init_x86(SnowDWTContext *c) @@ -892,14 +619,10 @@ av_cold void ff_dwt_init_x86(SnowDWTContext *c) int mm_flags = av_get_cpu_flags(); #if HAVE_INLINE_ASM - if (mm_flags & AV_CPU_FLAG_MMX) { if(mm_flags & AV_CPU_FLAG_SSE2 & 0){ c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; #if HAVE_7REGS c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; -#endif -#if HAVE_6REGS - c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; #endif } else{ @@ -909,11 +632,7 @@ av_cold void ff_dwt_init_x86(SnowDWTContext *c) c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; #endif } -#if HAVE_6REGS - c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; -#endif } - } #endif /* HAVE_INLINE_ASM */ #if HAVE_SSSE3_EXTERNAL if (EXTERNAL_SSSE3(mm_flags)) { diff --git a/tests/checkasm/snowdsp.c b/tests/checkasm/snowdsp.c index 2edad643ad..99cfae679e 100644 --- a/tests/checkasm/snowdsp.c +++ b/tests/checkasm/snowdsp.c @@ -46,7 +46,7 @@ static void checkasm_check_inner_add_yblock(const SnowDWTContext *const snowdsp) LOG2_MIN_BLOCKSIZE = 1, MAX_STRIDE = 256, }; - declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *obmc, const int obmc_stride, + declare_func(void, const uint8_t *obmc, const int obmc_stride, uint8_t **block, int b_w, int b_h, int src_x, int src_stride, IDWTELEM * const *lines, int add, uint8_t *dst8); @@ -104,8 +104,6 @@ static void checkasm_check_inner_add_yblock(const SnowDWTContext *const snowdsp) blocks[3] += (b_h - 1) * src_stride; src_stride = -src_stride; } - uint8_t *blocks_backup[4] = { blocks[0], blocks[1], - blocks[2], blocks[3] }; IDWTELEM *lines[MAX_BLOCKSIZE]; for (int k = 0; k < b_h; ++k) @@ -124,25 +122,11 @@ static void checkasm_check_inner_add_yblock(const SnowDWTContext *const snowdsp) memcpy(dst8_new, dst8_ref, sizeof(dst8_new)); call_ref(obmc, obmc_stride, blocks, b_w, b_h, src_x, src_stride, lines, 1, dst8p_ref); - memcpy(blocks, blocks_backup, sizeof(blocks));\ call_new(obmc, obmc_stride, blocks, b_w, b_h, src_x, src_stride, lines, 1, dst8p_new); if (memcmp(dst8_ref, dst8_new, sizeof(dst8_new))) fail(); -#undef CALL4 -#define CALL4(...)\ - do {\ - memcpy(blocks, blocks_backup, sizeof(blocks));\ - tfunc(__VA_ARGS__); \ - memcpy(blocks, blocks_backup, sizeof(blocks));\ - tfunc(__VA_ARGS__); \ - memcpy(blocks, blocks_backup, sizeof(blocks));\ - tfunc(__VA_ARGS__); \ - memcpy(blocks, blocks_backup, sizeof(blocks));\ - tfunc(__VA_ARGS__); \ - } while (0) - bench_new(obmc, obmc_stride, blocks, b_w, b_h, src_x, src_stride, lines, 1, dst8p_new); } } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
