This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new 34dfa8bf2b avcodec/x86/h264_intrapred: SSE2 impl. of pred8x8l_top_dc_8
34dfa8bf2b is described below
commit 34dfa8bf2b86041c2b836629ec815f630eb38f33
Author: Zuxy Meng <[email protected]>
AuthorDate: Tue Apr 21 20:43:03 2026 -0700
Commit: kierank <[email protected]>
CommitDate: Mon May 25 07:05:09 2026 +0000
avcodec/x86/h264_intrapred: SSE2 impl. of pred8x8l_top_dc_8
Deprecate MMX with less instruction count. Remove the SSSE3 impl. as we
no longer use palignr.
pred8x8l_top_dc_8_mmxext: 8.7 ( 2.29x)
pred8x8l_top_dc_8_ssse3: 7.9 ( 2.51x)
pred8x8l_top_dc_8_sse2: 6.7 ( 3.01x)
Signed-off-by: Zuxy Meng <[email protected]>
---
libavcodec/x86/h264_intrapred.asm | 74 +++++++++++++++---------------------
libavcodec/x86/h264_intrapred_init.c | 6 +--
2 files changed, 32 insertions(+), 48 deletions(-)
diff --git a/libavcodec/x86/h264_intrapred.asm
b/libavcodec/x86/h264_intrapred.asm
index 6cf01a228a..1cc0dd7f8f 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -805,57 +805,43 @@ cglobal pred8x8_tm_vp8_8, 2,3,6
; void ff_pred8x8l_top_dc_8(uint8_t *src, int has_topleft, int has_topright,
; ptrdiff_t stride)
;-----------------------------------------------------------------------------
-%macro PRED8x8L_TOP_DC 0
-cglobal pred8x8l_top_dc_8, 4,4
+INIT_XMM sse2
+cglobal pred8x8l_top_dc_8, 4,4,6
sub r0, r3
- pxor mm7, mm7
- movq mm0, [r0-8]
- movq mm3, [r0]
- movq mm1, [r0+8]
- movq mm2, mm3
- movq mm4, mm3
- PALIGNR mm2, mm0, 7, mm0
- PALIGNR mm1, mm4, 1, mm4
+ movu m2, [r0-8]
+ movu m3, [r0]
+ mova m1, m3
+ psrldq m2, 7
+ psrldq m1, 1
test r1d, r1d ; top_left
- jz .fix_lt_2
+ jnz .has_topleft
+ pxor m5, m3, m2
+ psllq m5, 56
+ psrlq m5, 56
+ pxor m2, m5
+.has_topleft:
test r2d, r2d ; top_right
- jz .fix_tr_1
- jmp .body
-.fix_lt_2:
- movq mm5, mm3
- pxor mm5, mm2
- psllq mm5, 56
- psrlq mm5, 56
- pxor mm2, mm5
- test r2d, r2d ; top_right
- jnz .body
-.fix_tr_1:
- movq mm5, mm3
- pxor mm5, mm1
- psrlq mm5, 56
- psllq mm5, 56
- pxor mm1, mm5
-.body:
- PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- psadbw mm7, mm0
- paddw mm7, [pw_4]
- psrlw mm7, 3
- pshufw mm7, mm7, 0
- packuswb mm7, mm7
+ jnz .has_topright
+ pxor m5, m3, m1
+ psrlq m5, 56
+ psllq m5, 56
+ pxor m1, m5
+.has_topright:
+ pxor m4, m4
+ PRED4x4_LOWPASS m0, m2, m1, m3, m5
+ psadbw m4, m0
+ paddw m4, [pw_4]
+ psrlw m4, 3
+ SPLATW m4, m4, 0
+ packuswb m4, m4
%rep 3
- movq [r0+r3*1], mm7
- movq [r0+r3*2], mm7
+ movq [r0+r3*1], m4
+ movq [r0+r3*2], m4
lea r0, [r0+r3*2]
%endrep
- movq [r0+r3*1], mm7
- movq [r0+r3*2], mm7
+ movq [r0+r3*1], m4
+ movq [r0+r3*2], m4
RET
-%endmacro
-
-INIT_MMX mmxext
-PRED8x8L_TOP_DC
-INIT_MMX ssse3
-PRED8x8L_TOP_DC
;-----------------------------------------------------------------------------
; void ff_pred8x8l_dc_8(uint8_t *src, int has_topleft, int has_topright,
diff --git a/libavcodec/x86/h264_intrapred_init.c
b/libavcodec/x86/h264_intrapred_init.c
index 697ce1af08..5b308f658f 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -124,8 +124,7 @@ PRED8x8(plane, 8, ssse3)
PRED8x8(tm_vp8, 8, sse2)
PRED8x8(tm_vp8, 8, ssse3)
-PRED8x8L(top_dc, 8, mmxext)
-PRED8x8L(top_dc, 8, ssse3)
+PRED8x8L(top_dc, 8, sse2)
PRED8x8L(dc, 8, mmxext)
PRED8x8L(dc, 8, ssse3)
PRED8x8L(horizontal, 8, mmxext)
@@ -164,7 +163,6 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int
codec_id,
if (bit_depth == 8) {
if (EXTERNAL_MMXEXT(cpu_flags)) {
- h->pred8x8l [TOP_DC_PRED ] =
ff_pred8x8l_top_dc_8_mmxext;
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_mmxext;
h->pred8x8l [HOR_PRED ] =
ff_pred8x8l_horizontal_8_mmxext;
h->pred8x8l [VERT_PRED ] =
ff_pred8x8l_vertical_8_mmxext;
@@ -197,6 +195,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int
codec_id,
if (EXTERNAL_SSE2(cpu_flags)) {
h->pred16x16[HOR_PRED8x8 ] =
ff_pred16x16_horizontal_8_sse2;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2;
+ h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_sse2;
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2;
h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] =
ff_pred8x8l_down_right_8_sse2;
h->pred8x8l [VERT_RIGHT_PRED ] =
ff_pred8x8l_vertical_right_8_sse2;
@@ -231,7 +230,6 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int
codec_id,
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_ssse3;
if (chroma_format_idc <= 1)
h->pred8x8 [HOR_PRED8x8 ] =
ff_pred8x8_horizontal_8_ssse3;
- h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_ssse3;
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_ssse3;
h->pred8x8l [HOR_PRED ] =
ff_pred8x8l_horizontal_8_ssse3;
h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_ssse3;
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]