From 8c5fdbfea42e9ad6ba6e1df5e4ea3c583d59537a Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyue...@iscas.ac.cn>
Date: Tue, 16 Jan 2024 23:57:53 +0800
Subject: [PATCH 3/3] lavc/h264pred: R-V V pred16x16_dc_8

C908
pred16x16_dc_8_c: 2.5
pred16x16_dc_8_rvv_i32: 1.7
---
 libavcodec/riscv/h264pred_init.c |  2 ++
 libavcodec/riscv/h264pred_rvv.S  | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/libavcodec/riscv/h264pred_init.c b/libavcodec/riscv/h264pred_init.c
index 8665bc729e..e8d5b7dd8f 100644
--- a/libavcodec/riscv/h264pred_init.c
+++ b/libavcodec/riscv/h264pred_init.c
@@ -26,6 +26,7 @@
 
 void ff_pred16x16_vertical_8_rvv(uint8_t *src, ptrdiff_t stride);
 void ff_pred16x16_horizontal_8_rvv(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_8_rvv(uint8_t *src, ptrdiff_t stride);
 
 av_cold void ff_h264_pred_init_riscv(H264PredContext *h, int codec_id,
                                    const int bit_depth,
@@ -38,6 +39,7 @@ av_cold void ff_h264_pred_init_riscv(H264PredContext *h, int codec_id,
             if (flags & AV_CPU_FLAG_RVV_I32) {
                 h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_rvv;
                 h->pred16x16[HOR_PRED8x8] = ff_pred16x16_horizontal_8_rvv;
+                h->pred16x16[DC_PRED8x8] = ff_pred16x16_dc_8_rvv;
             }
         #endif
     }
diff --git a/libavcodec/riscv/h264pred_rvv.S b/libavcodec/riscv/h264pred_rvv.S
index ba1e9045e1..1492991ef4 100644
--- a/libavcodec/riscv/h264pred_rvv.S
+++ b/libavcodec/riscv/h264pred_rvv.S
@@ -48,3 +48,31 @@ func ff_pred16x16_horizontal_8_rvv, zve32x
 
         ret
 endfunc
+
+func ff_pred16x16_dc_8_rvv, zve32x
+        vsetivli     zero, 1, e16, m1, ta, ma
+        vmv.v.x      v16, zero
+
+        vsetivli     zero, 16, e8, m1, ta, ma
+        sub          t2, a0, a1
+        vle8.v       v8, (t2)
+        vwredsumu.vs v16, v8, v16
+        addi         t2, a0, -1
+        vlse8.v      v8, (t2), a1
+        vwredsumu.vs v16, v8, v16
+        vsetivli     zero, 1, e16, m1, ta, ma
+        vmv.x.s      t1, v16
+        addi         t1, t1, 16
+        srai         t1, t1, 5
+        vsetivli     zero, 16, e8, m1, ta, ma
+        vmv.v.x      v0, t1
+        vsetivli     zero, 4, e8, mf4, ta, ma
+        li           t0, 16
+1:
+        vse32.v      v0, (a0)
+        addi         t0, t0, -1
+        add          a0, a0, a1
+        bnez         t0, 1b
+
+        ret
+endfunc
-- 
2.43.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to