This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit df7885d6c371dfb3b25862978a040eb0f2785516
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Mar 1 00:22:44 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Fri Mar 6 20:02:42 2026 +0100

    avcodec/x86/vvc/alf: Improve writing classify parameters
    
    The permutation that was applied before the write macro
    is actually only beneficial when one has 16 entries to write,
    so move it into the macro to write 16 entries and optimize
    the other macro.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vvc/alf.asm | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/libavcodec/x86/vvc/alf.asm b/libavcodec/x86/vvc/alf.asm
index f669375ed9..d27e1e0cfc 100644
--- a/libavcodec/x86/vvc/alf.asm
+++ b/libavcodec/x86/vvc/alf.asm
@@ -649,23 +649,23 @@ cglobal vvc_alf_classify_grad_%1bpc, 6, 14, 16, 
gradient_sum, src, src_stride, w
 
 ; SAVE_CLASSIFY_PARAM_W16(dest, src)
 %macro SAVE_CLASSIFY_PARAM_W16 2
+    vpermq                  m%2, m%2, 11011000b
     lea                   tempq, [%1q + xq]
     movu                [tempq], xm%2
-    vperm2i128              m%2, m%2, m%2, 1
+    vextracti128           xm%2, m%2, 1
     movu       [tempq + widthq], xm%2
 %endmacro
 
 ; SAVE_CLASSIFY_PARAM_W8
 %macro SAVE_CLASSIFY_PARAM_W8 2
     movq                   [%1], xm%2
-    vperm2i128              m%2, m%2, m%2, 1
-    movq          [%1 + widthq], xm%2
+    movhps        [%1 + widthq], xm%2
 %endmacro
 
 ; SAVE_CLASSIFY_PARAM_W4
 %macro SAVE_CLASSIFY_PARAM_W4 2
     movd                   [%1], xm%2
-    vperm2i128              m%2, m%2, m%2, 1
+    punpckhqdq             xm%2, xm%2
     movd          [%1 + widthq], xm%2
 %endmacro
 
@@ -676,7 +676,7 @@ cglobal vvc_alf_classify_grad_%1bpc, 6, 14, 16, 
gradient_sum, src, src_stride, w
     jl %%w4
     SAVE_CLASSIFY_PARAM_W8 tempq, %2
     je                   %%end
-    vpermq                 m%2, m%2, 00010011b
+    vextracti128          xm%2, m%2, 1
     add                  tempq, 8
 %%w4:
     SAVE_CLASSIFY_PARAM_W4 tempq, %2
@@ -775,7 +775,6 @@ cglobal vvc_alf_classify_grad_%1bpc, 6, 14, 16, 
gradient_sum, src, src_stride, w
     paddd            m11, m7, m7
     paddd            m11, m4
     paddd            m10, m11
-    vpermq           m10, m10, 11011000b
     SAVE_CLASSIFY_PARAM transpose_idx, 10
 
     psrlq            m10, m8, 32
@@ -832,7 +831,6 @@ cglobal vvc_alf_classify_grad_%1bpc, 6, 14, 16, 
gradient_sum, src, src_stride, w
     pandn             m1, m7
     paddd             m1, m1             ; dir1 << 1
     paddd             m6, m1             ; class_idx
-    vpermq            m6, m6, 11011000b
 
     SAVE_CLASSIFY_PARAM class_idx, 6
 %endmacro

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to