This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new f704dd77b7 libavcodec/riscv: add RVV optimized hevc_add_res
f704dd77b7 is described below
commit f704dd77b7d879707e6745424d129f6ec8f66035
Author: deng.zewen <[email protected]>
AuthorDate: Tue Apr 7 17:23:23 2026 +0800
Commit: michaelni <[email protected]>
CommitDate: Sun Jun 14 03:10:29 2026 +0000
libavcodec/riscv: add RVV optimized hevc_add_res
---
libavcodec/riscv/Makefile | 1 +
libavcodec/riscv/hevcdsp_idct_rvv.S | 149 ++++++++++++++++++++++++++++++++++++
libavcodec/riscv/hevcdsp_init.c | 22 ++++++
3 files changed, 172 insertions(+)
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 2c53334923..28d745cfe3 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -36,6 +36,7 @@ RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o
riscv/h264dsp_rvv.o \
OBJS-$(CONFIG_H264QPEL) += riscv/h264qpel_init.o
RVV-OBJS-$(CONFIG_H264QPEL) += riscv/h264qpel_rvv.o
OBJS-$(CONFIG_HEVC_DECODER) += riscv/hevcdsp_init.o
+RVV-OBJS-$(CONFIG_HEVC_DECODER) += riscv/hevcdsp_idct_rvv.o
RVV-OBJS-$(CONFIG_HEVC_DECODER) += riscv/h26x/h2656_inter_rvv.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
diff --git a/libavcodec/riscv/hevcdsp_idct_rvv.S
b/libavcodec/riscv/hevcdsp_idct_rvv.S
new file mode 100644
index 0000000000..d73bcc63fe
--- /dev/null
+++ b/libavcodec/riscv/hevcdsp_idct_rvv.S
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2026 ZTE Corporation.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_hevc_add_residual_4x4_8_rvv, zve32x
+ lpad 0
+ mv t1, a0
+ add t3, a0, a2
+ slli a2, a2, 1
+ li t4, 2
+ vsetivli zero, 4, e8, mf4, ta, ma
+1:
+ vle8.v v2, (a0)
+ vle8.v v3, (t3)
+
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vle16.v v0, (a1)
+ vzext.vf2 v4, v2
+ addi a1, a1, 8
+ vle16.v v1, (a1)
+ vzext.vf2 v5, v3
+ addi a1, a1, 8
+
+ vsadd.vv v0, v0, v4
+ vsadd.vv v1, v1, v5
+
+ vmax.vx v0, v0, zero
+ vmax.vx v1, v1, zero
+
+ vsetivli zero, 4, e8, mf2, ta, ma
+ vnclipu.wi v0, v0, 0
+ vnclipu.wi v1, v1, 0
+
+ vse8.v v0, (a0)
+ add a0, a0, a2
+ vse8.v v1, (t3)
+ add t3, t3, a2
+
+ addi t4, t4, -1
+ bnez t4, 1b
+ ret
+endfunc
+
+func ff_hevc_add_residual_8x8_8_rvv, zve32x
+ lpad 0
+ add t3, a0, a2
+ slli a2, a2, 1
+ li t4, 4
+ vsetivli zero, 8, e8, mf2, ta, ma
+1:
+ vle8.v v2, (a0)
+ vle8.v v3, (t3)
+
+ vsetivli zero, 8, e16, m1, ta, ma
+ vle16.v v0, (a1)
+ vzext.vf2 v4, v2
+ addi a1, a1, 16
+ vle16.v v1, (a1)
+ vzext.vf2 v5, v3
+ addi a1, a1, 16
+
+ vsadd.vv v0, v0, v4
+ vsadd.vv v1, v1, v5
+
+ vmax.vx v0, v0, zero
+ vmax.vx v1, v1, zero
+
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vnclipu.wi v0, v0, 0
+ vnclipu.wi v1, v1, 0
+
+ vse8.v v0, (a0)
+ add a0, a0, a2
+ vse8.v v1, (t3)
+ add t3, t3, a2
+
+ addi t4, t4, -1
+ bnez t4, 1b
+ ret
+endfunc
+
+func ff_hevc_add_residual_16x16_8_rvv, zve32x
+ lpad 0
+ li t4, 16
+ vsetivli zero, 16, e8, m1, ta, ma
+1:
+ vle8.v v2, (a0)
+
+ vsetivli zero, 16, e16, m2, ta, ma
+ vle16.v v0, (a1)
+ vzext.vf2 v4, v2
+ addi a1, a1, 32
+
+ vsadd.vv v0, v0, v4
+ vmax.vx v0, v0, zero
+ vsetivli zero, 16, e8, m1, ta, ma
+ vnclipu.wi v0, v0, 0
+
+ vse8.v v0, (a0)
+ add a0, a0, a2
+
+ addi t4, t4, -1
+ bnez t4, 1b
+ ret
+endfunc
+
+func ff_hevc_add_residual_32x32_8_rvv, zve32x
+ lpad 0
+ li t0, 32
+ li t4, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+1:
+ vle8.v v4, (a0)
+
+ vsetvli zero, t0, e16, m4, ta, ma
+ vle16.v v0, (a1)
+ vzext.vf2 v8, v4
+ addi a1, a1, 64
+
+ vsadd.vv v0, v0, v8
+ vmax.vx v0, v0, zero
+ vsetvli zero, t0, e8, m2, ta, ma
+ vnclipu.wi v0, v0, 0
+
+ vse8.v v0, (a0)
+ add a0, a0, a2
+
+ addi t4, t4, -1
+ bnez t4, 1b
+ ret
+endfunc
diff --git a/libavcodec/riscv/hevcdsp_init.c b/libavcodec/riscv/hevcdsp_init.c
index 70bc8ebea7..13ab2baf9c 100644
--- a/libavcodec/riscv/hevcdsp_init.c
+++ b/libavcodec/riscv/hevcdsp_init.c
@@ -27,6 +27,15 @@
#include "libavcodec/hevc/dsp.h"
#include "libavcodec/riscv/h26x/h2656dsp.h"
+void ff_hevc_add_residual_4x4_8_rvv(uint8_t *_dst, const int16_t *coeffs,
+ ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_8_rvv(uint8_t *_dst, const int16_t *coeffs,
+ ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_8_rvv(uint8_t *_dst, const int16_t *coeffs,
+ ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_8_rvv(uint8_t *_dst, const int16_t *coeffs,
+ ptrdiff_t stride);
+
#define RVV_FNASSIGN(member, v, h, fn, ext) \
member[1][v][h] = ff_h2656_put_pixels_##8_##ext; \
member[3][v][h] = ff_h2656_put_pixels_##8_##ext; \
@@ -63,5 +72,18 @@ void ff_hevc_dsp_init_riscv(HEVCDSPContext *c, const int
bit_depth)
break;
}
}
+
+ if (vlenb >= 16) {
+ switch (bit_depth){
+ case 8:
+ c->add_residual[0] =
ff_hevc_add_residual_4x4_8_rvv;
+ c->add_residual[1] =
ff_hevc_add_residual_8x8_8_rvv;
+ c->add_residual[2] =
ff_hevc_add_residual_16x16_8_rvv;
+ c->add_residual[3] =
ff_hevc_add_residual_32x32_8_rvv;
+ break;
+ default:
+ break;
+ }
+ }
#endif
}
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]