PR #23067 opened by mkver URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23067 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23067.patch
>From dcf5d041ad50f9b23b234bd9aa4da6ff862fb296 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 10 May 2026 19:53:57 +0200 Subject: [PATCH 1/6] avfilter/vf_pp7: Constify Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavfilter/vf_pp7.c | 18 +++++++++--------- libavfilter/vf_pp7.h | 5 ++--- libavfilter/x86/vf_pp7.asm | 2 +- libavfilter/x86/vf_pp7_init.c | 2 +- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c index 7b653b977f..ea27e10060 100644 --- a/libavfilter/vf_pp7.c +++ b/libavfilter/vf_pp7.c @@ -96,7 +96,7 @@ static void init_thres2(PP7Context *p) } } -static inline void dctA_c(int16_t *dst, uint8_t *src, int stride) +static inline void dctA_c(int16_t *dst, const uint8_t *src, int stride) { int i; @@ -119,7 +119,7 @@ static inline void dctA_c(int16_t *dst, uint8_t *src, int stride) } } -static void dctB_c(int16_t *dst, int16_t *src) +static void dctB_c(int16_t *dst, const int16_t *src) { int i; @@ -142,7 +142,7 @@ static void dctB_c(int16_t *dst, int16_t *src) } } -static int hardthresh_c(PP7Context *p, int16_t *src, int qp) +static int hardthresh_c(const PP7Context *p, const int16_t *src, int qp) { int i; int a; @@ -158,7 +158,7 @@ static int hardthresh_c(PP7Context *p, int16_t *src, int qp) return (a + (1 << 11)) >> 12; } -static int mediumthresh_c(PP7Context *p, int16_t *src, int qp) +static int mediumthresh_c(const PP7Context *p, const int16_t *src, int qp) { int i; int a; @@ -182,7 +182,7 @@ static int mediumthresh_c(PP7Context *p, int16_t *src, int qp) return (a + (1 << 11)) >> 12; } -static int softthresh_c(PP7Context *p, int16_t *src, int qp) +static int softthresh_c(const PP7Context *p, const int16_t *src, int qp) { int i; int a; @@ -202,10 +202,10 @@ static int softthresh_c(PP7Context *p, int16_t *src, int qp) return (a + (1 << 11)) >> 12; } -static void filter(PP7Context *p, uint8_t *dst, uint8_t *src, +static void filter(PP7Context *p, uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int width, int height, - uint8_t *qp_store, int qp_stride, int is_luma) + const uint8_t *qp_store, int qp_stride, int is_luma) { int x, y; const int stride = is_luma ? p->temp_stride : ((width + 16 + 15) & (~15)); @@ -231,7 +231,7 @@ static void filter(PP7Context *p, uint8_t *dst, uint8_t *src, for (y = 0; y < height; y++) { for (x = -8; x < 0; x += 4) { const int index = x + y * stride + (8 - 3) * (1 + stride) + 8; //FIXME silly offset - uint8_t *src = p_src + index; + const uint8_t *src = p_src + index; int16_t *tp = temp + 4 * x; dctA_c(tp + 4 * 8, src, stride); @@ -249,7 +249,7 @@ static void filter(PP7Context *p, uint8_t *dst, uint8_t *src, } for (; x < end; x++) { const int index = x + y * stride + (8 - 3) * (1 + stride) + 8; //FIXME silly offset - uint8_t *src = p_src + index; + const uint8_t *src = p_src + index; int16_t *tp = temp + 4 * x; int v; diff --git a/libavfilter/vf_pp7.h b/libavfilter/vf_pp7.h index b7cbb020bb..c733079291 100644 --- a/libavfilter/vf_pp7.h +++ b/libavfilter/vf_pp7.h @@ -37,9 +37,8 @@ typedef struct PP7Context { int temp_stride; uint8_t *src; - int (*requantize)(struct PP7Context *p, int16_t *src, int qp); - void (*dctB)(int16_t *dst, int16_t *src); - + int (*requantize)(const struct PP7Context *p, const int16_t *src, int qp); + void (*dctB)(int16_t *dst, const int16_t *src); } PP7Context; void ff_pp7_init_x86(PP7Context *pp7); diff --git a/libavfilter/x86/vf_pp7.asm b/libavfilter/x86/vf_pp7.asm index 7b3e5cf5e3..9dfabdcc8d 100644 --- a/libavfilter/x86/vf_pp7.asm +++ b/libavfilter/x86/vf_pp7.asm @@ -26,7 +26,7 @@ SECTION .text INIT_MMX mmx -;void ff_pp7_dctB_mmx(int16_t *dst, int16_t *src) +;void ff_pp7_dctB_mmx(int16_t *dst, const int16_t *src) cglobal pp7_dctB, 2, 2, 0, dst, src movq m0, [srcq] movq m1, [srcq+mmsize*1] diff --git a/libavfilter/x86/vf_pp7_init.c b/libavfilter/x86/vf_pp7_init.c index 165b0dd5d0..a87882359d 100644 --- a/libavfilter/x86/vf_pp7_init.c +++ b/libavfilter/x86/vf_pp7_init.c @@ -23,7 +23,7 @@ #include "libavutil/x86/cpu.h" #include "libavfilter/vf_pp7.h" -void ff_pp7_dctB_mmx(int16_t *dst, int16_t *src); +void ff_pp7_dctB_mmx(int16_t *dst, const int16_t *src); av_cold void ff_pp7_init_x86(PP7Context *p) { -- 2.52.0 >From d92a1bf0bfc9051a1c301c145b67f3fd5df858d7 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 10 May 2026 20:18:59 +0200 Subject: [PATCH 2/6] avfilter/vf_pp7: Add proper PP7DSPContext This is in preparation for checkasm tests for dctB. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavfilter/vf_pp7.c | 52 +++++++++++------------- libavfilter/{vf_pp7.h => vf_pp7dsp.h} | 57 ++++++++++++++++++--------- libavfilter/x86/vf_pp7_init.c | 4 +- 3 files changed, 62 insertions(+), 51 deletions(-) rename libavfilter/{vf_pp7.h => vf_pp7dsp.h} (50%) diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c index ea27e10060..d8a5501b47 100644 --- a/libavfilter/vf_pp7.c +++ b/libavfilter/vf_pp7.c @@ -33,10 +33,12 @@ #include "libavutil/mem_internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" +#include "libavutil/video_enc_params.h" +#include "avfilter.h" #include "filters.h" #include "qp_table.h" -#include "vf_pp7.h" +#include "vf_pp7dsp.h" #include "video.h" enum mode { @@ -45,6 +47,23 @@ enum mode { MODE_MEDIUM }; +typedef struct PP7Context { + const AVClass *class; + int thres2[99][16]; + + int qp; + int mode; + enum AVVideoEncParamsType qscale_type; + int hsub; + int vsub; + int temp_stride; + uint8_t *src; + + int (*requantize)(const struct PP7Context *p, const int16_t *src, int qp); + + PP7DSPContext pp7dsp; +} PP7Context; + #define OFFSET(x) offsetof(PP7Context, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM static const AVOption pp7_options[] = { @@ -119,29 +138,6 @@ static inline void dctA_c(int16_t *dst, const uint8_t *src, int stride) } } -static void dctB_c(int16_t *dst, const int16_t *src) -{ - int i; - - for (i = 0; i < 4; i++) { - int s0 = src[0 * 4] + src[6 * 4]; - int s1 = src[1 * 4] + src[5 * 4]; - int s2 = src[2 * 4] + src[4 * 4]; - int s3 = src[3 * 4]; - int s = s3 + s3; - s3 = s - s0; - s0 = s + s0; - s = s2 + s1; - s2 = s2 - s1; - dst[0 * 4] = s0 + s; - dst[2 * 4] = s0 - s; - dst[1 * 4] = 2 * s3 + s2; - dst[3 * 4] = s3 - 2 * s2; - src++; - dst++; - } -} - static int hardthresh_c(const PP7Context *p, const int16_t *src, int qp) { int i; @@ -256,7 +252,7 @@ static void filter(PP7Context *p, uint8_t *dst, const uint8_t *src, if ((x & 3) == 0) dctA_c(tp + 4 * 8, src, stride); - p->dctB(block, tp); + p->pp7dsp.dctB(block, tp); v = p->requantize(p, block, qp); v = (v + dither[y & 7][x & 7]) >> 6; @@ -303,11 +299,7 @@ static int config_input(AVFilterLink *inlink) case 2: pp7->requantize = mediumthresh_c; break; } - pp7->dctB = dctB_c; - -#if ARCH_X86 && HAVE_X86ASM - ff_pp7_init_x86(pp7); -#endif + ff_pp7dsp_init(&pp7->pp7dsp); return 0; } diff --git a/libavfilter/vf_pp7.h b/libavfilter/vf_pp7dsp.h similarity index 50% rename from libavfilter/vf_pp7.h rename to libavfilter/vf_pp7dsp.h index c733079291..e75917bdb4 100644 --- a/libavfilter/vf_pp7.h +++ b/libavfilter/vf_pp7dsp.h @@ -19,28 +19,47 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef AVFILTER_PP7_H -#define AVFILTER_PP7_H +#ifndef AVFILTER_PP7DSP_H +#define AVFILTER_PP7DSP_H -#include "libavutil/video_enc_params.h" -#include "avfilter.h" +#include <stdint.h> -typedef struct PP7Context { - AVClass *class; - int thres2[99][16]; +#include "config.h" - int qp; - int mode; - enum AVVideoEncParamsType qscale_type; - int hsub; - int vsub; - int temp_stride; - uint8_t *src; - - int (*requantize)(const struct PP7Context *p, const int16_t *src, int qp); +typedef struct PP7DSPContext { void (*dctB)(int16_t *dst, const int16_t *src); -} PP7Context; +} PP7DSPContext; -void ff_pp7_init_x86(PP7Context *pp7); +void ff_pp7dsp_init_x86(PP7DSPContext *pp7dsp); -#endif /* AVFILTER_PP7_H */ +static void dctB_c(int16_t *dst, const int16_t *src) +{ + for (int i = 0; i < 4; i++) { + int s0 = src[0 * 4] + src[6 * 4]; + int s1 = src[1 * 4] + src[5 * 4]; + int s2 = src[2 * 4] + src[4 * 4]; + int s3 = src[3 * 4]; + int s = s3 + s3; + s3 = s - s0; + s0 = s + s0; + s = s2 + s1; + s2 = s2 - s1; + dst[0 * 4] = s0 + s; + dst[2 * 4] = s0 - s; + dst[1 * 4] = 2 * s3 + s2; + dst[3 * 4] = s3 - 2 * s2; + src++; + dst++; + } +} + +static inline void ff_pp7dsp_init(PP7DSPContext *pp7dsp) +{ + pp7dsp->dctB = dctB_c; + +#if ARCH_X86 && HAVE_X86ASM + ff_pp7dsp_init_x86(pp7dsp); +#endif +} + +#endif /* AVFILTER_PP7DSP_H */ diff --git a/libavfilter/x86/vf_pp7_init.c b/libavfilter/x86/vf_pp7_init.c index a87882359d..53ac907f27 100644 --- a/libavfilter/x86/vf_pp7_init.c +++ b/libavfilter/x86/vf_pp7_init.c @@ -21,11 +21,11 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/x86/cpu.h" -#include "libavfilter/vf_pp7.h" +#include "libavfilter/vf_pp7dsp.h" void ff_pp7_dctB_mmx(int16_t *dst, const int16_t *src); -av_cold void ff_pp7_init_x86(PP7Context *p) +av_cold void ff_pp7dsp_init_x86(PP7DSPContext *p) { int cpu_flags = av_get_cpu_flags(); -- 2.52.0 >From c7a827ed04442dea20fa99966efd542b388ee68a Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 10 May 2026 20:43:49 +0200 Subject: [PATCH 3/6] tests/checkasm: Add vf_pp7 checkasm test Signed-off-by: Andreas Rheinhardt <[email protected]> --- tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 3 ++ tests/checkasm/checkasm.h | 1 + tests/checkasm/vf_pp7.c | 66 +++++++++++++++++++++++++++++++++++++++ tests/fate/checkasm.mak | 1 + 5 files changed, 72 insertions(+) create mode 100644 tests/checkasm/vf_pp7.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 55d2527047..53d8f3ec66 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -76,6 +76,7 @@ AVFILTEROBJS-$(CONFIG_FSPP_FILTER) += vf_fspp.o AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o AVFILTEROBJS-$(CONFIG_IDET_FILTER) += vf_idet.o +AVFILTEROBJS-$(CONFIG_PP7_FILTER) += vf_pp7.o AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o AVFILTEROBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index e863ff6eed..93298c46a7 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -342,6 +342,9 @@ static const struct { #if CONFIG_NLMEANS_FILTER { "vf_nlmeans", checkasm_check_nlmeans }, #endif + #if CONFIG_PP7_FILTER + { "vf_pp7", checkasm_check_vf_pp7 }, + #endif #if CONFIG_THRESHOLD_FILTER { "vf_threshold", checkasm_check_vf_threshold }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 72a1404163..552de20169 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -162,6 +162,7 @@ void checkasm_check_vf_eq(void); void checkasm_check_vf_fspp(void); void checkasm_check_vf_gblur(void); void checkasm_check_vf_hflip(void); +void checkasm_check_vf_pp7(void); void checkasm_check_vf_threshold(void); void checkasm_check_vf_sobel(void); void checkasm_check_vp3dsp(void); diff --git a/tests/checkasm/vf_pp7.c b/tests/checkasm/vf_pp7.c new file mode 100644 index 0000000000..07664f7472 --- /dev/null +++ b/tests/checkasm/vf_pp7.c @@ -0,0 +1,66 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include "checkasm.h" +#include "libavfilter/vf_pp7dsp.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/mem_internal.h" + +#define randomize_buffer(buf) \ + do { \ + static_assert(!(sizeof(buf) % 4), "Tail handling needed"); \ + for (size_t k = 0; k < sizeof(buf); k += 4) { \ + AV_WN32A((char*)buf + k, rnd()); \ + } \ + } while (0) + +static void check_dctB(const PP7DSPContext *const pp7dsp) +{ + declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *dst, const int16_t *src); + + if (!check_func(pp7dsp->dctB, "dctB")) + return; + + DECLARE_ALIGNED(8, int16_t, src)[7 * 4]; + DECLARE_ALIGNED(8, int16_t, dst_ref)[6 * 4]; + DECLARE_ALIGNED(8, int16_t, dst_new)[6 * 4]; + + randomize_buffer(src); + randomize_buffer(dst_ref); + memcpy(dst_new, dst_ref, sizeof(dst_new)); + call_ref(dst_ref, src); + call_new(dst_new, src); + if (memcmp(dst_new, dst_ref, sizeof(dst_new))) + fail(); + + bench_new(dst_new, src); +} + +void checkasm_check_vf_pp7(void) +{ + PP7DSPContext pp7dsp; + + ff_pp7dsp_init(&pp7dsp); + + check_dctB(&pp7dsp); + report("dctB"); +} diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index b7392fa745..5fc1ec1e5f 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -83,6 +83,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \ fate-checkasm-vf_hflip \ fate-checkasm-vf_idet \ fate-checkasm-vf_nlmeans \ + fate-checkasm-vf_pp7 \ fate-checkasm-vf_threshold \ fate-checkasm-vf_sobel \ fate-checkasm-videodsp \ -- 2.52.0 >From 238738b91fa137c576f4048e5e65d907872763dc Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 10 May 2026 20:47:50 +0200 Subject: [PATCH 4/6] avfilter/vf_pp7dsp: Add restrict Makes GCC optimize the scalar codepath away. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavfilter/vf_pp7dsp.h | 4 ++-- libavfilter/x86/vf_pp7_init.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libavfilter/vf_pp7dsp.h b/libavfilter/vf_pp7dsp.h index e75917bdb4..cb405f4f58 100644 --- a/libavfilter/vf_pp7dsp.h +++ b/libavfilter/vf_pp7dsp.h @@ -27,12 +27,12 @@ #include "config.h" typedef struct PP7DSPContext { - void (*dctB)(int16_t *dst, const int16_t *src); + void (*dctB)(int16_t *restrict dst, const int16_t *restrict src); } PP7DSPContext; void ff_pp7dsp_init_x86(PP7DSPContext *pp7dsp); -static void dctB_c(int16_t *dst, const int16_t *src) +static void dctB_c(int16_t *restrict dst, const int16_t *restrict src) { for (int i = 0; i < 4; i++) { int s0 = src[0 * 4] + src[6 * 4]; diff --git a/libavfilter/x86/vf_pp7_init.c b/libavfilter/x86/vf_pp7_init.c index 53ac907f27..f294ca7764 100644 --- a/libavfilter/x86/vf_pp7_init.c +++ b/libavfilter/x86/vf_pp7_init.c @@ -23,7 +23,7 @@ #include "libavutil/x86/cpu.h" #include "libavfilter/vf_pp7dsp.h" -void ff_pp7_dctB_mmx(int16_t *dst, const int16_t *src); +void ff_pp7_dctB_mmx(int16_t *restrict dst, const int16_t *restrict src); av_cold void ff_pp7dsp_init_x86(PP7DSPContext *p) { -- 2.52.0 >From a8bca4ba6a27f6fccfecc1b0ab7d8ad8ee993dc7 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 10 May 2026 22:01:15 +0200 Subject: [PATCH 5/6] avfilter/x86/vf_pp7: Port ff_pp7_dctB_mmx to SSE2 Unfortunately a bit slower than the MMX version due to the impossibility to use memory operands in paddw. The situation would reverse if ff_dctB_mmx() would have to issue emms. dctB_c: 3.7 ( 1.00x) dctB_mmx: 3.3 ( 1.13x) dctB_sse2: 3.6 ( 1.03x) Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavfilter/vf_pp7.c | 2 -- libavfilter/x86/vf_pp7.asm | 55 +++++++++++++++++------------------ libavfilter/x86/vf_pp7_init.c | 6 ++-- tests/checkasm/vf_pp7.c | 2 +- 4 files changed, 30 insertions(+), 35 deletions(-) diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c index d8a5501b47..10f56c804f 100644 --- a/libavfilter/vf_pp7.c +++ b/libavfilter/vf_pp7.c @@ -27,7 +27,6 @@ * project, and ported by Arwa Arif for FFmpeg. */ -#include "libavutil/emms.h" #include "libavutil/imgutils.h" #include "libavutil/mem.h" #include "libavutil/mem_internal.h" @@ -351,7 +350,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) cw, ch, qp_table, qp_stride, 0); filter(pp7, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw, ch, qp_table, qp_stride, 0); - emms_c(); } } diff --git a/libavfilter/x86/vf_pp7.asm b/libavfilter/x86/vf_pp7.asm index 9dfabdcc8d..1a0921ed50 100644 --- a/libavfilter/x86/vf_pp7.asm +++ b/libavfilter/x86/vf_pp7.asm @@ -24,34 +24,31 @@ SECTION .text -INIT_MMX mmx +INIT_XMM sse2 +;void ff_pp7_dctB_sse2(int16_t *dst, const int16_t *src) +cglobal pp7_dctB, 2, 2, 6, dst, src + movq m0, [srcq+8*0] + movq m5, [srcq+8*6] + movq m3, [srcq+8*3] + movq m1, [srcq+8*1] + movq m4, [srcq+8*5] + movq m2, [srcq+8*2] + paddw m0, m5 + movq m5, [srcq+8*4] + paddw m3, m3 + paddw m1, m4 + paddw m2, m5 -;void ff_pp7_dctB_mmx(int16_t *dst, const int16_t *src) -cglobal pp7_dctB, 2, 2, 0, dst, src - movq m0, [srcq] - movq m1, [srcq+mmsize*1] - paddw m0, [srcq+mmsize*6] - paddw m1, [srcq+mmsize*5] - movq m2, [srcq+mmsize*2] - movq m3, [srcq+mmsize*3] - paddw m2, [srcq+mmsize*4] - paddw m3, m3 - movq m4, m3 - psubw m3, m0 - paddw m4, m0 - movq m0, m2 - psubw m2, m1 - paddw m0, m1 - movq m1, m4 - psubw m4, m0 - paddw m1, m0 - movq m0, m3 - psubw m3, m2 - psubw m3, m2 - paddw m2, m0 - paddw m2, m0 - movq [dstq], m1 - movq [dstq+mmsize*2], m4 - movq [dstq+mmsize*1], m2 - movq [dstq+mmsize*3], m3 + SUMSUB_BA w, 0, 3, 4 + SUMSUB_BA w, 1, 2, 5 + + SUMSUB_BA w, 1, 0, 4 + movq [dstq], m1 + paddw m4, m2, m3 + paddw m2, m2 + movq [dstq+8*2], m0 + paddw m4, m3 + psubw m3, m2 + movq [dstq+8*1], m4 + movq [dstq+8*3], m3 RET diff --git a/libavfilter/x86/vf_pp7_init.c b/libavfilter/x86/vf_pp7_init.c index f294ca7764..725326382b 100644 --- a/libavfilter/x86/vf_pp7_init.c +++ b/libavfilter/x86/vf_pp7_init.c @@ -23,12 +23,12 @@ #include "libavutil/x86/cpu.h" #include "libavfilter/vf_pp7dsp.h" -void ff_pp7_dctB_mmx(int16_t *restrict dst, const int16_t *restrict src); +void ff_pp7_dctB_sse2(int16_t *restrict dst, const int16_t *restrict src); av_cold void ff_pp7dsp_init_x86(PP7DSPContext *p) { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_MMX(cpu_flags)) - p->dctB = ff_pp7_dctB_mmx; + if (EXTERNAL_SSE2(cpu_flags)) + p->dctB = ff_pp7_dctB_sse2; } diff --git a/tests/checkasm/vf_pp7.c b/tests/checkasm/vf_pp7.c index 07664f7472..e506eeb16c 100644 --- a/tests/checkasm/vf_pp7.c +++ b/tests/checkasm/vf_pp7.c @@ -35,7 +35,7 @@ static void check_dctB(const PP7DSPContext *const pp7dsp) { - declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *dst, const int16_t *src); + declare_func(void, int16_t *dst, const int16_t *src); if (!check_func(pp7dsp->dctB, "dctB")) return; -- 2.52.0 >From 5433b8b0d8c36dcf489b6ed347eda47ce9bf51e0 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 10 May 2026 22:41:11 +0200 Subject: [PATCH 6/6] avfilter/vf_pp7: Fix shadowing Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavfilter/vf_pp7.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c index 10f56c804f..a2ad375227 100644 --- a/libavfilter/vf_pp7.c +++ b/libavfilter/vf_pp7.c @@ -226,10 +226,9 @@ static void filter(PP7Context *p, uint8_t *dst, const uint8_t *src, for (y = 0; y < height; y++) { for (x = -8; x < 0; x += 4) { const int index = x + y * stride + (8 - 3) * (1 + stride) + 8; //FIXME silly offset - const uint8_t *src = p_src + index; int16_t *tp = temp + 4 * x; - dctA_c(tp + 4 * 8, src, stride); + dctA_c(tp + 4 * 8, p_src + index, stride); } for (x = 0; x < width; ) { const int qps = 3 + is_luma; @@ -244,12 +243,11 @@ static void filter(PP7Context *p, uint8_t *dst, const uint8_t *src, } for (; x < end; x++) { const int index = x + y * stride + (8 - 3) * (1 + stride) + 8; //FIXME silly offset - const uint8_t *src = p_src + index; int16_t *tp = temp + 4 * x; int v; if ((x & 3) == 0) - dctA_c(tp + 4 * 8, src, stride); + dctA_c(tp + 4 * 8, p_src + index, stride); p->pp7dsp.dctB(block, tp); -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
