Re: [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.
On Fri, May 03, 2024 at 04:07:36AM +0800, hu heng wrote: > 于2024年4月26日周五 20:21写道: > > > > From: huheng > > > > rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with > > the names of standalone asm functions. When ffmpeg is compiled with > > --disable-x86asm, using the scale function will cause the video to > > be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT > > is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language > > version, which causes a problem of mismatch with the vfilter. This > > problem has persisted from version 4.4 to the present. Fix it by using > > inline yuv2yuv_X_mmxext, that can maintain the consistency of > > use_mmx_vfilter. > > > > reproduce the issue: > > 1. ./configure --disable-x86asm --enable-gpl --enable-libx264 > > 2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4 > > the output.mp4 is abnormal > > > > Signed-off-by: huheng > > --- > > libswscale/x86/swscale.c | 6 +++- > > libswscale/x86/swscale_template.c | 53 +++ > > 2 files changed, 58 insertions(+), 1 deletion(-) > > > > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c > > index ff16398988..1bb9d1d51a 100644 > > --- a/libswscale/x86/swscale.c > > +++ b/libswscale/x86/swscale.c > > @@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c) > > int cpu_flags = av_get_cpu_flags(); > > > > #if HAVE_MMXEXT_INLINE > > -if (INLINE_MMXEXT(cpu_flags)) > > +if (INLINE_MMXEXT(cpu_flags)) { > > sws_init_swscale_mmxext(c); > > +if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) { > > +c->yuv2planeX = yuv2yuv_X_mmxext; > > +} > > +} > > #endif > > if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) { > > #if HAVE_MMXEXT_EXTERNAL > > diff --git a/libswscale/x86/swscale_template.c > > b/libswscale/x86/swscale_template.c > > index 6190fcb4fe..1b8794480d 100644 > > --- a/libswscale/x86/swscale_template.c > > +++ b/libswscale/x86/swscale_template.c > > @@ -33,6 +33,59 @@ > > #define MOVNTQ2 "movntq " > > #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) > > > > +static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize, > > + const int16_t **src, uint8_t *dest, int dstW, > > + const uint8_t *dither, int offset) > > +{ > > +filterSize--; > > +__asm__ volatile( > > +"movd %0, %%mm1\n\t" > > +"punpcklwd %%mm1, %%mm1\n\t" > > +"punpckldq %%mm1, %%mm1\n\t" > > +"psllw$3, %%mm1\n\t" > > +"paddw %%mm1, %%mm3\n\t" > > +"paddw %%mm1, %%mm4\n\t" > > +"psraw$4, %%mm3\n\t" > > +"psraw$4, %%mm4\n\t" > > +::"m"(filterSize) > > + ); > > + > > +__asm__ volatile(\ > > +"movq%%mm3, %%mm6\n\t" > > +"movq%%mm4, %%mm7\n\t" > > +"movl %3, %%ecx\n\t" > > +"mov %0, %%"FF_REG_d" \n\t"\ > > +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\ > > +".p2align 4 \n\t" > > /* FIXME Unroll? */\ > > +"1: \n\t"\ > > +"movq 8(%%"FF_REG_d"), %%mm0 \n\t" > > /* filterCoeff */\ > > +"movq(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" > > /* srcData */\ > > +"movq 8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" > > /* srcData */\ > > +"add$16, %%"FF_REG_d" \n\t"\ > > +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\ > > +"test %%"FF_REG_S", %%"FF_REG_S"\n\t"\ > > +"pmulhw %%mm0, %%mm2 \n\t"\ > > +"pmulhw %%mm0, %%mm5 \n\t"\ > > +"paddw%%mm2, %%mm3 \n\t"\ > > +"paddw%%mm5, %%mm4 \n\t"\ > > +" jnz1b \n\t"\ > > +"psraw $3, %%mm3 \n\t"\ > > +"psraw $3, %%mm4 \n\t"\ > > +"packuswb %%mm4, %%mm3 \n\t" > > +MOVNTQ2 " %%mm3, (%1, %%"FF_REG_c")\n\t" > > +"add $8, %%"FF_REG_c" \n\t"\ > > +"cmp %2, %%"FF_REG_c" \n\t"\ > > +"movq%%mm6, %%mm3\n\t" > > +"movq%%mm7, %%mm4\n\t" > > +"mov %0, %%"FF_REG_d" \n\t"\ > > +"mov(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ > > +"jb 1b \n\t"\ > > +:: "g" (filter), > > + "r"
Re: [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.
于2024年4月26日周五 20:21写道: > > From: huheng > > rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with > the names of standalone asm functions. When ffmpeg is compiled with > --disable-x86asm, using the scale function will cause the video to > be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT > is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language > version, which causes a problem of mismatch with the vfilter. This > problem has persisted from version 4.4 to the present. Fix it by using > inline yuv2yuv_X_mmxext, that can maintain the consistency of > use_mmx_vfilter. > > reproduce the issue: > 1. ./configure --disable-x86asm --enable-gpl --enable-libx264 > 2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4 > the output.mp4 is abnormal > > Signed-off-by: huheng > --- > libswscale/x86/swscale.c | 6 +++- > libswscale/x86/swscale_template.c | 53 +++ > 2 files changed, 58 insertions(+), 1 deletion(-) > > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c > index ff16398988..1bb9d1d51a 100644 > --- a/libswscale/x86/swscale.c > +++ b/libswscale/x86/swscale.c > @@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c) > int cpu_flags = av_get_cpu_flags(); > > #if HAVE_MMXEXT_INLINE > -if (INLINE_MMXEXT(cpu_flags)) > +if (INLINE_MMXEXT(cpu_flags)) { > sws_init_swscale_mmxext(c); > +if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) { > +c->yuv2planeX = yuv2yuv_X_mmxext; > +} > +} > #endif > if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) { > #if HAVE_MMXEXT_EXTERNAL > diff --git a/libswscale/x86/swscale_template.c > b/libswscale/x86/swscale_template.c > index 6190fcb4fe..1b8794480d 100644 > --- a/libswscale/x86/swscale_template.c > +++ b/libswscale/x86/swscale_template.c > @@ -33,6 +33,59 @@ > #define MOVNTQ2 "movntq " > #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) > > +static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize, > + const int16_t **src, uint8_t *dest, int dstW, > + const uint8_t *dither, int offset) > +{ > +filterSize--; > +__asm__ volatile( > +"movd %0, %%mm1\n\t" > +"punpcklwd %%mm1, %%mm1\n\t" > +"punpckldq %%mm1, %%mm1\n\t" > +"psllw$3, %%mm1\n\t" > +"paddw %%mm1, %%mm3\n\t" > +"paddw %%mm1, %%mm4\n\t" > +"psraw$4, %%mm3\n\t" > +"psraw$4, %%mm4\n\t" > +::"m"(filterSize) > + ); > + > +__asm__ volatile(\ > +"movq%%mm3, %%mm6\n\t" > +"movq%%mm4, %%mm7\n\t" > +"movl %3, %%ecx\n\t" > +"mov %0, %%"FF_REG_d" \n\t"\ > +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\ > +".p2align 4 \n\t" /* > FIXME Unroll? */\ > +"1: \n\t"\ > +"movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* > filterCoeff */\ > +"movq(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* > srcData */\ > +"movq 8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* > srcData */\ > +"add$16, %%"FF_REG_d" \n\t"\ > +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\ > +"test %%"FF_REG_S", %%"FF_REG_S"\n\t"\ > +"pmulhw %%mm0, %%mm2 \n\t"\ > +"pmulhw %%mm0, %%mm5 \n\t"\ > +"paddw%%mm2, %%mm3 \n\t"\ > +"paddw%%mm5, %%mm4 \n\t"\ > +" jnz1b \n\t"\ > +"psraw $3, %%mm3 \n\t"\ > +"psraw $3, %%mm4 \n\t"\ > +"packuswb %%mm4, %%mm3 \n\t" > +MOVNTQ2 " %%mm3, (%1, %%"FF_REG_c")\n\t" > +"add $8, %%"FF_REG_c" \n\t"\ > +"cmp %2, %%"FF_REG_c" \n\t"\ > +"movq%%mm6, %%mm3\n\t" > +"movq%%mm7, %%mm4\n\t" > +"mov %0, %%"FF_REG_d" \n\t"\ > +"mov(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ > +"jb 1b \n\t"\ > +:: "g" (filter), > + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) > +: "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c > +); > +} > + > #define YSCALEYUV2PACKEDX_UV \ > __asm__ volatile(\ > "xor%%"FF_REG_a", %%"FF_REG_a" \n\t"\ > -- > 2.20.1
[FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.
From: huheng rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with the names of standalone asm functions. When ffmpeg is compiled with --disable-x86asm, using the scale function will cause the video to be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language version, which causes a problem of mismatch with the vfilter. This problem has persisted from version 4.4 to the present. Fix it by using inline yuv2yuv_X_mmxext, that can maintain the consistency of use_mmx_vfilter. reproduce the issue: 1. ./configure --disable-x86asm --enable-gpl --enable-libx264 2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4 the output.mp4 is abnormal Signed-off-by: huheng --- libswscale/x86/swscale.c | 6 +++- libswscale/x86/swscale_template.c | 53 +++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index ff16398988..1bb9d1d51a 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c) int cpu_flags = av_get_cpu_flags(); #if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) +if (INLINE_MMXEXT(cpu_flags)) { sws_init_swscale_mmxext(c); +if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) { +c->yuv2planeX = yuv2yuv_X_mmxext; +} +} #endif if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) { #if HAVE_MMXEXT_EXTERNAL diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 6190fcb4fe..1b8794480d 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -33,6 +33,59 @@ #define MOVNTQ2 "movntq " #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) +static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset) +{ +filterSize--; +__asm__ volatile( +"movd %0, %%mm1\n\t" +"punpcklwd %%mm1, %%mm1\n\t" +"punpckldq %%mm1, %%mm1\n\t" +"psllw$3, %%mm1\n\t" +"paddw %%mm1, %%mm3\n\t" +"paddw %%mm1, %%mm4\n\t" +"psraw$4, %%mm3\n\t" +"psraw$4, %%mm4\n\t" +::"m"(filterSize) + ); + +__asm__ volatile(\ +"movq%%mm3, %%mm6\n\t" +"movq%%mm4, %%mm7\n\t" +"movl %3, %%ecx\n\t" +"mov %0, %%"FF_REG_d" \n\t"\ +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\ +".p2align 4 \n\t" /* FIXME Unroll? */\ +"1: \n\t"\ +"movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\ +"movq(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* srcData */\ +"movq 8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* srcData */\ +"add$16, %%"FF_REG_d" \n\t"\ +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\ +"test %%"FF_REG_S", %%"FF_REG_S"\n\t"\ +"pmulhw %%mm0, %%mm2 \n\t"\ +"pmulhw %%mm0, %%mm5 \n\t"\ +"paddw%%mm2, %%mm3 \n\t"\ +"paddw%%mm5, %%mm4 \n\t"\ +" jnz1b \n\t"\ +"psraw $3, %%mm3 \n\t"\ +"psraw $3, %%mm4 \n\t"\ +"packuswb %%mm4, %%mm3 \n\t" +MOVNTQ2 " %%mm3, (%1, %%"FF_REG_c")\n\t" +"add $8, %%"FF_REG_c" \n\t"\ +"cmp %2, %%"FF_REG_c" \n\t"\ +"movq%%mm6, %%mm3\n\t" +"movq%%mm7, %%mm4\n\t" +"mov %0, %%"FF_REG_d" \n\t"\ +"mov(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ +"jb 1b \n\t"\ +:: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) +: "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c +); +} + #define YSCALEYUV2PACKEDX_UV \ __asm__ volatile(\ "xor%%"FF_REG_a", %%"FF_REG_a" \n\t"\ -- 2.20.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject