Re: [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.

2024-05-03 Thread Michael Niedermayer
On Fri, May 03, 2024 at 04:07:36AM +0800, hu heng wrote:
>  于2024年4月26日周五 20:21写道:
> >
> > From: huheng 
> >
> > rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with
> > the names of standalone asm functions. When ffmpeg is compiled with
> > --disable-x86asm, using the scale function will cause the video to
> > be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT
> > is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language
> > version, which causes a problem of mismatch with the vfilter. This
> > problem has persisted from version 4.4 to the present. Fix it by using
> > inline yuv2yuv_X_mmxext, that can maintain the consistency of
> > use_mmx_vfilter.
> >
> > reproduce the issue:
> > 1. ./configure --disable-x86asm --enable-gpl --enable-libx264
> > 2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4
> > the output.mp4 is abnormal
> >
> > Signed-off-by: huheng 
> > ---
> >  libswscale/x86/swscale.c  |  6 +++-
> >  libswscale/x86/swscale_template.c | 53 +++
> >  2 files changed, 58 insertions(+), 1 deletion(-)
> >
> > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> > index ff16398988..1bb9d1d51a 100644
> > --- a/libswscale/x86/swscale.c
> > +++ b/libswscale/x86/swscale.c
> > @@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
> >  int cpu_flags = av_get_cpu_flags();
> >
> >  #if HAVE_MMXEXT_INLINE
> > -if (INLINE_MMXEXT(cpu_flags))
> > +if (INLINE_MMXEXT(cpu_flags)) {
> >  sws_init_swscale_mmxext(c);
> > +if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
> > +c->yuv2planeX = yuv2yuv_X_mmxext;
> > +}
> > +}
> >  #endif
> >  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
> >  #if HAVE_MMXEXT_EXTERNAL
> > diff --git a/libswscale/x86/swscale_template.c 
> > b/libswscale/x86/swscale_template.c
> > index 6190fcb4fe..1b8794480d 100644
> > --- a/libswscale/x86/swscale_template.c
> > +++ b/libswscale/x86/swscale_template.c
> > @@ -33,6 +33,59 @@
> >  #define MOVNTQ2 "movntq "
> >  #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
> >
> > +static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize,
> > +   const int16_t **src, uint8_t *dest, int dstW,
> > +   const uint8_t *dither, int offset)
> > +{
> > +filterSize--;
> > +__asm__ volatile(
> > +"movd %0, %%mm1\n\t"
> > +"punpcklwd %%mm1, %%mm1\n\t"
> > +"punpckldq %%mm1, %%mm1\n\t"
> > +"psllw$3, %%mm1\n\t"
> > +"paddw %%mm1, %%mm3\n\t"
> > +"paddw %%mm1, %%mm4\n\t"
> > +"psraw$4, %%mm3\n\t"
> > +"psraw$4, %%mm4\n\t"
> > +::"m"(filterSize)
> > + );
> > +
> > +__asm__ volatile(\
> > +"movq%%mm3, %%mm6\n\t"
> > +"movq%%mm4, %%mm7\n\t"
> > +"movl %3, %%ecx\n\t"
> > +"mov %0, %%"FF_REG_d"   \n\t"\
> > +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\
> > +".p2align 4 \n\t" 
> > /* FIXME Unroll? */\
> > +"1: \n\t"\
> > +"movq  8(%%"FF_REG_d"), %%mm0   \n\t" 
> > /* filterCoeff */\
> > +"movq(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" 
> > /* srcData */\
> > +"movq   8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" 
> > /* srcData */\
> > +"add$16, %%"FF_REG_d"   \n\t"\
> > +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\
> > +"test %%"FF_REG_S", %%"FF_REG_S"\n\t"\
> > +"pmulhw   %%mm0, %%mm2  \n\t"\
> > +"pmulhw   %%mm0, %%mm5  \n\t"\
> > +"paddw%%mm2, %%mm3  \n\t"\
> > +"paddw%%mm5, %%mm4  \n\t"\
> > +" jnz1b \n\t"\
> > +"psraw   $3, %%mm3  \n\t"\
> > +"psraw   $3, %%mm4  \n\t"\
> > +"packuswb %%mm4, %%mm3  \n\t"
> > +MOVNTQ2 " %%mm3, (%1, %%"FF_REG_c")\n\t"
> > +"add  $8, %%"FF_REG_c"  \n\t"\
> > +"cmp  %2, %%"FF_REG_c"  \n\t"\
> > +"movq%%mm6, %%mm3\n\t"
> > +"movq%%mm7, %%mm4\n\t"
> > +"mov %0, %%"FF_REG_d" \n\t"\
> > +"mov(%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
> > +"jb  1b   \n\t"\
> > +:: "g" (filter),
> > +   "r" 

Re: [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.

2024-05-02 Thread hu heng
 于2024年4月26日周五 20:21写道:
>
> From: huheng 
>
> rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with
> the names of standalone asm functions. When ffmpeg is compiled with
> --disable-x86asm, using the scale function will cause the video to
> be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT
> is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language
> version, which causes a problem of mismatch with the vfilter. This
> problem has persisted from version 4.4 to the present. Fix it by using
> inline yuv2yuv_X_mmxext, that can maintain the consistency of
> use_mmx_vfilter.
>
> reproduce the issue:
> 1. ./configure --disable-x86asm --enable-gpl --enable-libx264
> 2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4
> the output.mp4 is abnormal
>
> Signed-off-by: huheng 
> ---
>  libswscale/x86/swscale.c  |  6 +++-
>  libswscale/x86/swscale_template.c | 53 +++
>  2 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> index ff16398988..1bb9d1d51a 100644
> --- a/libswscale/x86/swscale.c
> +++ b/libswscale/x86/swscale.c
> @@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
>  int cpu_flags = av_get_cpu_flags();
>
>  #if HAVE_MMXEXT_INLINE
> -if (INLINE_MMXEXT(cpu_flags))
> +if (INLINE_MMXEXT(cpu_flags)) {
>  sws_init_swscale_mmxext(c);
> +if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
> +c->yuv2planeX = yuv2yuv_X_mmxext;
> +}
> +}
>  #endif
>  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
>  #if HAVE_MMXEXT_EXTERNAL
> diff --git a/libswscale/x86/swscale_template.c 
> b/libswscale/x86/swscale_template.c
> index 6190fcb4fe..1b8794480d 100644
> --- a/libswscale/x86/swscale_template.c
> +++ b/libswscale/x86/swscale_template.c
> @@ -33,6 +33,59 @@
>  #define MOVNTQ2 "movntq "
>  #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
>
> +static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize,
> +   const int16_t **src, uint8_t *dest, int dstW,
> +   const uint8_t *dither, int offset)
> +{
> +filterSize--;
> +__asm__ volatile(
> +"movd %0, %%mm1\n\t"
> +"punpcklwd %%mm1, %%mm1\n\t"
> +"punpckldq %%mm1, %%mm1\n\t"
> +"psllw$3, %%mm1\n\t"
> +"paddw %%mm1, %%mm3\n\t"
> +"paddw %%mm1, %%mm4\n\t"
> +"psraw$4, %%mm3\n\t"
> +"psraw$4, %%mm4\n\t"
> +::"m"(filterSize)
> + );
> +
> +__asm__ volatile(\
> +"movq%%mm3, %%mm6\n\t"
> +"movq%%mm4, %%mm7\n\t"
> +"movl %3, %%ecx\n\t"
> +"mov %0, %%"FF_REG_d"   \n\t"\
> +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\
> +".p2align 4 \n\t" /* 
> FIXME Unroll? */\
> +"1: \n\t"\
> +"movq  8(%%"FF_REG_d"), %%mm0   \n\t" /* 
> filterCoeff */\
> +"movq(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* 
> srcData */\
> +"movq   8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* 
> srcData */\
> +"add$16, %%"FF_REG_d"   \n\t"\
> +"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\
> +"test %%"FF_REG_S", %%"FF_REG_S"\n\t"\
> +"pmulhw   %%mm0, %%mm2  \n\t"\
> +"pmulhw   %%mm0, %%mm5  \n\t"\
> +"paddw%%mm2, %%mm3  \n\t"\
> +"paddw%%mm5, %%mm4  \n\t"\
> +" jnz1b \n\t"\
> +"psraw   $3, %%mm3  \n\t"\
> +"psraw   $3, %%mm4  \n\t"\
> +"packuswb %%mm4, %%mm3  \n\t"
> +MOVNTQ2 " %%mm3, (%1, %%"FF_REG_c")\n\t"
> +"add  $8, %%"FF_REG_c"  \n\t"\
> +"cmp  %2, %%"FF_REG_c"  \n\t"\
> +"movq%%mm6, %%mm3\n\t"
> +"movq%%mm7, %%mm4\n\t"
> +"mov %0, %%"FF_REG_d" \n\t"\
> +"mov(%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
> +"jb  1b   \n\t"\
> +:: "g" (filter),
> +   "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
> +: "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
> +);
> +}
> +
>  #define YSCALEYUV2PACKEDX_UV \
>  __asm__ volatile(\
>  "xor%%"FF_REG_a", %%"FF_REG_a"  \n\t"\
> --
> 2.20.1

[FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.

2024-04-26 Thread heng . hu . 1989
From: huheng 

rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with
the names of standalone asm functions. When ffmpeg is compiled with
--disable-x86asm, using the scale function will cause the video to
be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT
is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language
version, which causes a problem of mismatch with the vfilter. This
problem has persisted from version 4.4 to the present. Fix it by using
inline yuv2yuv_X_mmxext, that can maintain the consistency of
use_mmx_vfilter.

reproduce the issue:
1. ./configure --disable-x86asm --enable-gpl --enable-libx264
2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4
the output.mp4 is abnormal

Signed-off-by: huheng 
---
 libswscale/x86/swscale.c  |  6 +++-
 libswscale/x86/swscale_template.c | 53 +++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index ff16398988..1bb9d1d51a 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
 int cpu_flags = av_get_cpu_flags();
 
 #if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags))
+if (INLINE_MMXEXT(cpu_flags)) {
 sws_init_swscale_mmxext(c);
+if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
+c->yuv2planeX = yuv2yuv_X_mmxext;
+}
+}
 #endif
 if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
 #if HAVE_MMXEXT_EXTERNAL
diff --git a/libswscale/x86/swscale_template.c 
b/libswscale/x86/swscale_template.c
index 6190fcb4fe..1b8794480d 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -33,6 +33,59 @@
 #define MOVNTQ2 "movntq "
 #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
 
+static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize,
+   const int16_t **src, uint8_t *dest, int dstW,
+   const uint8_t *dither, int offset)
+{
+filterSize--;
+__asm__ volatile(
+"movd %0, %%mm1\n\t"
+"punpcklwd %%mm1, %%mm1\n\t"
+"punpckldq %%mm1, %%mm1\n\t"
+"psllw$3, %%mm1\n\t"
+"paddw %%mm1, %%mm3\n\t"
+"paddw %%mm1, %%mm4\n\t"
+"psraw$4, %%mm3\n\t"
+"psraw$4, %%mm4\n\t"
+::"m"(filterSize)
+ );
+
+__asm__ volatile(\
+"movq%%mm3, %%mm6\n\t"
+"movq%%mm4, %%mm7\n\t"
+"movl %3, %%ecx\n\t"
+"mov %0, %%"FF_REG_d"   \n\t"\
+"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\
+".p2align 4 \n\t" /* 
FIXME Unroll? */\
+"1: \n\t"\
+"movq  8(%%"FF_REG_d"), %%mm0   \n\t" /* 
filterCoeff */\
+"movq(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* 
srcData */\
+"movq   8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* 
srcData */\
+"add$16, %%"FF_REG_d"   \n\t"\
+"mov(%%"FF_REG_d"), %%"FF_REG_S"\n\t"\
+"test %%"FF_REG_S", %%"FF_REG_S"\n\t"\
+"pmulhw   %%mm0, %%mm2  \n\t"\
+"pmulhw   %%mm0, %%mm5  \n\t"\
+"paddw%%mm2, %%mm3  \n\t"\
+"paddw%%mm5, %%mm4  \n\t"\
+" jnz1b \n\t"\
+"psraw   $3, %%mm3  \n\t"\
+"psraw   $3, %%mm4  \n\t"\
+"packuswb %%mm4, %%mm3  \n\t"
+MOVNTQ2 " %%mm3, (%1, %%"FF_REG_c")\n\t"
+"add  $8, %%"FF_REG_c"  \n\t"\
+"cmp  %2, %%"FF_REG_c"  \n\t"\
+"movq%%mm6, %%mm3\n\t"
+"movq%%mm7, %%mm4\n\t"
+"mov %0, %%"FF_REG_d" \n\t"\
+"mov(%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
+"jb  1b   \n\t"\
+:: "g" (filter),
+   "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
+: "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
+);
+}
+
 #define YSCALEYUV2PACKEDX_UV \
 __asm__ volatile(\
 "xor%%"FF_REG_a", %%"FF_REG_a"  \n\t"\
-- 
2.20.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject