This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 7971953d299e71ca3ab5d95f452dddfb2f061ad7
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun May 10 22:01:15 2026 +0200
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Fri May 15 20:29:29 2026 +0200

    avfilter/x86/vf_pp7: Port ff_pp7_dctB_mmx to SSE2
    
    Unfortunately a bit slower than the MMX version due to
    the impossibility to use memory operands in paddw.
    The situation would reverse if ff_dctB_mmx() would have
    to issue emms.
    
    dctB_c:                                                  3.7 ( 1.00x)
    dctB_mmx:                                                3.3 ( 1.13x)
    dctB_sse2:                                               3.6 ( 1.03x)
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavfilter/vf_pp7.c          |  2 --
 libavfilter/x86/vf_pp7.asm    | 55 ++++++++++++++++++++-----------------------
 libavfilter/x86/vf_pp7_init.c |  6 ++---
 tests/checkasm/vf_pp7.c       |  2 +-
 4 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c
index d8a5501b47..10f56c804f 100644
--- a/libavfilter/vf_pp7.c
+++ b/libavfilter/vf_pp7.c
@@ -27,7 +27,6 @@
  * project, and ported by Arwa Arif for FFmpeg.
  */
 
-#include "libavutil/emms.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
@@ -351,7 +350,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                    cw,        ch,        qp_table, qp_stride, 0);
             filter(pp7, out->data[2], in->data[2], out->linesize[2], 
in->linesize[2],
                    cw,        ch,        qp_table, qp_stride, 0);
-            emms_c();
         }
     }
 
diff --git a/libavfilter/x86/vf_pp7.asm b/libavfilter/x86/vf_pp7.asm
index 9dfabdcc8d..1a0921ed50 100644
--- a/libavfilter/x86/vf_pp7.asm
+++ b/libavfilter/x86/vf_pp7.asm
@@ -24,34 +24,31 @@
 
 SECTION .text
 
-INIT_MMX mmx
+INIT_XMM sse2
+;void ff_pp7_dctB_sse2(int16_t *dst, const int16_t *src)
+cglobal pp7_dctB, 2, 2, 6, dst, src
+    movq         m0, [srcq+8*0]
+    movq         m5, [srcq+8*6]
+    movq         m3, [srcq+8*3]
+    movq         m1, [srcq+8*1]
+    movq         m4, [srcq+8*5]
+    movq         m2, [srcq+8*2]
+    paddw        m0, m5
+    movq         m5, [srcq+8*4]
+    paddw        m3, m3
+    paddw        m1, m4
+    paddw        m2, m5
 
-;void ff_pp7_dctB_mmx(int16_t *dst, const int16_t *src)
-cglobal pp7_dctB, 2, 2, 0, dst, src
-    movq   m0, [srcq]
-    movq   m1, [srcq+mmsize*1]
-    paddw  m0, [srcq+mmsize*6]
-    paddw  m1, [srcq+mmsize*5]
-    movq   m2, [srcq+mmsize*2]
-    movq   m3, [srcq+mmsize*3]
-    paddw  m2, [srcq+mmsize*4]
-    paddw  m3, m3
-    movq   m4, m3
-    psubw  m3, m0
-    paddw  m4, m0
-    movq   m0, m2
-    psubw  m2, m1
-    paddw  m0, m1
-    movq   m1, m4
-    psubw  m4, m0
-    paddw  m1, m0
-    movq   m0, m3
-    psubw  m3, m2
-    psubw  m3, m2
-    paddw  m2, m0
-    paddw  m2, m0
-    movq   [dstq], m1
-    movq   [dstq+mmsize*2], m4
-    movq   [dstq+mmsize*1], m2
-    movq   [dstq+mmsize*3], m3
+    SUMSUB_BA     w, 0, 3, 4
+    SUMSUB_BA     w, 1, 2, 5
+
+    SUMSUB_BA     w, 1, 0, 4
+    movq     [dstq], m1
+    paddw        m4, m2, m3
+    paddw        m2, m2
+    movq [dstq+8*2], m0
+    paddw        m4, m3
+    psubw        m3, m2
+    movq [dstq+8*1], m4
+    movq [dstq+8*3], m3
     RET
diff --git a/libavfilter/x86/vf_pp7_init.c b/libavfilter/x86/vf_pp7_init.c
index f294ca7764..725326382b 100644
--- a/libavfilter/x86/vf_pp7_init.c
+++ b/libavfilter/x86/vf_pp7_init.c
@@ -23,12 +23,12 @@
 #include "libavutil/x86/cpu.h"
 #include "libavfilter/vf_pp7dsp.h"
 
-void ff_pp7_dctB_mmx(int16_t *restrict dst, const int16_t *restrict src);
+void ff_pp7_dctB_sse2(int16_t *restrict dst, const int16_t *restrict src);
 
 av_cold void ff_pp7dsp_init_x86(PP7DSPContext *p)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_MMX(cpu_flags))
-        p->dctB = ff_pp7_dctB_mmx;
+    if (EXTERNAL_SSE2(cpu_flags))
+        p->dctB = ff_pp7_dctB_sse2;
 }
diff --git a/tests/checkasm/vf_pp7.c b/tests/checkasm/vf_pp7.c
index 07664f7472..e506eeb16c 100644
--- a/tests/checkasm/vf_pp7.c
+++ b/tests/checkasm/vf_pp7.c
@@ -35,7 +35,7 @@
 
 static void check_dctB(const PP7DSPContext *const pp7dsp)
 {
-    declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *dst, const int16_t *src);
+    declare_func(void, int16_t *dst, const int16_t *src);
 
     if (!check_func(pp7dsp->dctB, "dctB"))
         return;

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to