On 6/20/2025 10:21 AM, Niklas Haas wrote:
From: Niklas Haas <g...@haasn.dev>Processes two channels in parallel, using 128-bit XMM registers. In theory, we could go up to YMM registers to process 4 channels, but this is not a gain except for relatively high channel counts (e.g. 7.1), and also complicates the sample load/store operations considerably. I decided to only add an AVX variant, since the C code is not substantially slower enough to justify a separate function just for ancient CPUs. --- libavfilter/f_ebur128.c | 15 ++-- libavfilter/f_ebur128.h | 16 ++++ libavfilter/x86/Makefile | 4 + libavfilter/x86/f_ebur128.asm | 141 +++++++++++++++++++++++++++++++ libavfilter/x86/f_ebur128_init.c | 35 ++++++++ 5 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 libavfilter/x86/f_ebur128.asm create mode 100644 libavfilter/x86/f_ebur128_init.c diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c index b9e210c05a..2d94cefce7 100644 --- a/libavfilter/f_ebur128.c +++ b/libavfilter/f_ebur128.c @@ -579,6 +579,11 @@ static av_cold int init(AVFilterContext *ctx) /* summary */ av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);+ ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;+#if ARCH_X86 + ff_ebur128_init_x86(&ebur128->dsp); +#endif + return 0; }@@ -692,11 +697,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)MOVE_TO_NEXT_CACHED_ENTRY(400); MOVE_TO_NEXT_CACHED_ENTRY(3000);- ff_ebur128_filter_channels_c(dsp, &samples[idx_insample * nb_channels],- &ebur128->i400.cache[bin_id_400 * nb_channels], - &ebur128->i3000.cache[bin_id_3000 * nb_channels], - ebur128->i400.sum, ebur128->i3000.sum, - nb_channels); + dsp->filter_channels(dsp, &samples[idx_insample * nb_channels], + &ebur128->i400.cache[bin_id_400 * nb_channels], + &ebur128->i3000.cache[bin_id_3000 * nb_channels], + ebur128->i400.sum, ebur128->i3000.sum, + nb_channels);#define FIND_PEAK(global, sp, ptype) do { \int ch; \ diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h index 7b8e876576..1889e28bdd 100644 --- a/libavfilter/f_ebur128.h +++ b/libavfilter/f_ebur128.h @@ -22,6 +22,9 @@ #ifndef AVFILTER_F_EBUR128_H #define AVFILTER_F_EBUR128_H+#include <assert.h>+#include <stddef.h> + typedef struct EBUR128Biquad { double b0, b1, b2; double a1, a2; @@ -35,8 +38,21 @@ typedef struct EBUR128DSPContext { /* Cache of 3 samples for each channel */ double *y; /* after pre-filter */ double *z; /* after RLB-filter */ + + /* DSP functions */ + void (*filter_channels)(const struct EBUR128DSPContext *dsp, + const double *samples, + double *cache_400, double *cache_3000, + double *sum_400, double *sum_3000, + int nb_channels); } EBUR128DSPContext;+static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct layout mismatch");+static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch"); +static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch"); + +void ff_ebur128_init_x86(EBUR128DSPContext *dsp); + void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *, double *, double *, double *, double *, int);diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefileindex 0d9a28a935..e5f0c55a5e 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -7,6 +7,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o +OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o @@ -52,6 +53,9 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o +ifdef ARCH_X86_64
nit: The way we do this usually is by adding this check to the asm file, to cover whatever is needed after the x86util.asm include.
Also, a checkasm test would be nice.
OpenPGP_signature.asc
Description: OpenPGP digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".