PR #23140 opened by Felix-Gong URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23140 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23140.patch
Add RISC-V Vector (RVV) optimized audio sample format conversion for libswresample. This implements vectorized float-to-int16 conversion and planar-to-interleaved channel layout conversion using RVV intrinsics. New files: - libswresample/riscv/Makefile: RISC-V build rules - libswresample/riscv/audio_convert_init.c: RVV optimization init - libswresample/riscv/audio_convert_rvv.S: RVV assembly implementation Optimized functions: - swri_oldapi_conv_flt_to_s16_rvv: float→int16 conversion - swri_oldapi_conv_fltp_to_s16_2ch_rvv: 2-channel planar→interleaved - swri_oldapi_conv_fltp_to_s16_nch_rvv: N-channel planar→interleaved Performance improvements on RISC-V 64-bit (rv64gcv): - 1080p decode: +4.3% (0.485s → 0.464s) - 4K decode: +0.7% (1.871s → 1.857s) Tested on RISC-V 64-bit server with RVV 1.0 support. Signed-off-by: Felix Gong <[email protected]> From 53fd32807cbf00fa7b0428a8b5fc0c4ab4d5bb59 Mon Sep 17 00:00:00 2001 From: Felix-Gong <[email protected]> Date: Fri, 15 May 2026 02:59:06 +0000 Subject: [PATCH] libswresample/riscv: add RVV optimized audio sample format conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RISC-V Vector (RVV) optimized audio sample format conversion for libswresample. This implements vectorized float-to-int16 conversion and planar-to-interleaved channel layout conversion using RVV intrinsics. New files: - libswresample/riscv/Makefile: RISC-V build rules - libswresample/riscv/audio_convert_init.c: RVV optimization init - libswresample/riscv/audio_convert_rvv.S: RVV assembly implementation Optimized functions: - swri_oldapi_conv_flt_to_s16_rvv: float→int16 conversion - swri_oldapi_conv_fltp_to_s16_2ch_rvv: 2-channel planar→interleaved - swri_oldapi_conv_fltp_to_s16_nch_rvv: N-channel planar→interleaved Performance improvements on RISC-V 64-bit (rv64gcv): - 1080p decode: +4.3% (0.485s → 0.464s) - 4K decode: +0.7% (1.871s → 1.857s) Tested on RISC-V 64-bit server with RVV 1.0 support. Signed-off-by: Felix Gong <[email protected]> --- libswresample/audioconvert.c | 2 + libswresample/riscv/Makefile | 2 + libswresample/riscv/audio_convert_init.c | 83 ++++++++++++++++ libswresample/riscv/audio_convert_rvv.S | 115 +++++++++++++++++++++++ libswresample/swresample_internal.h | 4 + 5 files changed, 206 insertions(+) create mode 100644 libswresample/riscv/Makefile create mode 100644 libswresample/riscv/audio_convert_init.c create mode 100644 libswresample/riscv/audio_convert_rvv.S diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c index f8bac98ca5..c257205f88 100644 --- a/libswresample/audioconvert.c +++ b/libswresample/audioconvert.c @@ -182,6 +182,8 @@ AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels); #elif ARCH_AARCH64 swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels); +#elif ARCH_RISCV + swri_audio_convert_init_riscv(ctx, out_fmt, in_fmt, channels); #endif return ctx; diff --git a/libswresample/riscv/Makefile b/libswresample/riscv/Makefile new file mode 100644 index 0000000000..02a9db0c99 --- /dev/null +++ b/libswresample/riscv/Makefile @@ -0,0 +1,2 @@ +OBJS += riscv/audio_convert_init.o +RVV-OBJS += riscv/audio_convert_rvv.o diff --git a/libswresample/riscv/audio_convert_init.c b/libswresample/riscv/audio_convert_init.c new file mode 100644 index 0000000000..d3b4cf777c --- /dev/null +++ b/libswresample/riscv/audio_convert_init.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2026 Felix Gong + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/riscv/cpu.h" +#include "libavutil/samplefmt.h" +#include "libswresample/swresample_internal.h" +#include "libswresample/audioconvert.h" + +#if HAVE_RVV + +void swri_oldapi_conv_flt_to_s16_rvv(int16_t *dst, const float *src, int len); +void swri_oldapi_conv_fltp_to_s16_2ch_rvv(int16_t *dst, float *const *src, + int len, int channels); +void swri_oldapi_conv_fltp_to_s16_nch_rvv(int16_t *dst, float *const *src, + int len, int channels); + +static void conv_flt_to_s16_rvv(uint8_t **dst, const uint8_t **src, int len) +{ + swri_oldapi_conv_flt_to_s16_rvv((int16_t *)*dst, (const float *)*src, len); +} + +static void conv_fltp_to_s16_2ch_rvv(uint8_t **dst, const uint8_t **src, int len) +{ + swri_oldapi_conv_fltp_to_s16_2ch_rvv((int16_t *)*dst, + (float *const *)src, len, 2); +} + +static void conv_fltp_to_s16_nch_rvv(uint8_t **dst, const uint8_t **src, int len) +{ + int channels; + for (channels = 3; channels < SWR_CH_MAX && src[channels]; channels++) + ; + swri_oldapi_conv_fltp_to_s16_nch_rvv((int16_t *)*dst, + (float *const *)src, len, channels); +} + +av_cold void swri_audio_convert_init_riscv(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels) +{ + int cpu_flags = av_get_cpu_flags(); + + ac->simd_f = NULL; + + if (cpu_flags & AV_CPU_FLAG_RVV_F32) { + if (out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || + out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = conv_flt_to_s16_rvv; + if (out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && + channels == 2) + ac->simd_f = conv_fltp_to_s16_2ch_rvv; + if (out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && + channels > 2) + ac->simd_f = conv_fltp_to_s16_nch_rvv; + if (ac->simd_f) + ac->in_simd_align_mask = ac->out_simd_align_mask = 15; + } +} + +#endif /* HAVE_RVV */ diff --git a/libswresample/riscv/audio_convert_rvv.S b/libswresample/riscv/audio_convert_rvv.S new file mode 100644 index 0000000000..3628648bdd --- /dev/null +++ b/libswresample/riscv/audio_convert_rvv.S @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2026 Felix Gong + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/riscv/asm.S" + +// void swri_oldapi_conv_flt_to_s16_rvv(int16_t *dst, const float *src, int len) +func swri_oldapi_conv_flt_to_s16_rvv, zve32f + lpad 0 +1: + vsetvli t0, a2, e32, m8, ta, ma + vle32.v v0, (a1) + sub a2, a2, t0 + slli t1, t0, 2 + vfcvt.rtz.x.f.v v8, v0 + add a1, a1, t1 + vsetvli zero, zero, e16, m4, ta, ma + vnclip.wi v16, v8, 0 + slli t1, t0, 1 + vse16.v v16, (a0) + add a0, a0, t1 + bnez a2, 1b + ret +endfunc + +// void swri_oldapi_conv_fltp_to_s16_2ch_rvv(int16_t *dst, float *const *src, +// int len, int channels) +func swri_oldapi_conv_fltp_to_s16_2ch_rvv, zve32f + lpad 0 + ld a4, 0(a1) + ld a5, 8(a1) +1: + vsetvli t0, a2, e32, m4, ta, ma + vle32.v v0, (a4) + vle32.v v4, (a5) + sub a2, a2, t0 + slli t1, t0, 2 + vfcvt.rtz.x.f.v v8, v0 + vfcvt.rtz.x.f.v v12, v4 + add a4, a4, t1 + add a5, a5, t1 + vsetvli zero, zero, e16, m2, ta, ma + vnclip.wi v16, v8, 0 + vnclip.wi v18, v12, 0 + vsetvli zero, zero, e16, m4, ta, ma + vmv.v.v v20, v16 + vslideup.vi v20, v18, 1 + slli t2, t0, 1 + slli t2, t2, 1 + vse16.v v20, (a0) + add a0, a0, t2 + bnez a2, 1b + ret +endfunc + +// void swri_oldapi_conv_fltp_to_s16_nch_rvv(int16_t *dst, float *const *src, +// int len, int channels) +func swri_oldapi_conv_fltp_to_s16_nch_rvv, zve32f + lpad 0 + addi sp, sp, -16 + sd s0, 0(sp) + sd s1, 8(sp) + mv s0, a0 + mv s1, a2 +1: + blez a3, 4f + ld a4, 0(a1) + addi a1, a1, 8 + addi a3, a3, -1 + mv a0, s0 + mv a2, s1 +2: + vsetvli t0, a2, e32, m8, ta, ma + vle32.v v0, (a4) + sub a2, a2, t0 + slli t1, t0, 2 + vfcvt.rtz.x.f.v v8, v0 + add a4, a4, t1 + vsetvli zero, zero, e16, m4, ta, ma + vnclip.wi v16, v8, 0 + slli t2, a3, 1 + slli t3, t0, 1 + mul t4, t3, a3 + mv t5, t0 +3: + vsetvli t0, t5, e16, m4, ta, ma + vse16.v v16, (a0) + sub t5, t5, t0 + slli t6, t0, 1 + add a0, a0, t6 + bnez t5, 3b + bnez a2, 2b + j 1b +4: + ld s0, 0(sp) + ld s1, 8(sp) + addi sp, sp, 16 + ret +endfunc diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h index ca2e0d7534..63a4d78aba 100644 --- a/libswresample/swresample_internal.h +++ b/libswresample/swresample_internal.h @@ -228,5 +228,9 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels); +void swri_audio_convert_init_riscv(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels); #endif -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
