From: sunyuechi
k230
banana_f3
put_hevc_pel_uni_pixels4_8_c: 126.3 ( 1.00x)90.5 (
1.00x)
put_hevc_pel_uni_pixels4_8_rvv_i32: 24.6 ( 5.14x)17.5 (
5.18x)
put_hevc_pel_uni_pixe
From: sunyuechi
k230 banana_f3
sad_8x16_c: 385.9 ( 1.00x)403.1 ( 1.00x)
sad_8x16_rvv_i32:108.1 ( 3.57x)100.8 ( 4.00x)
sad_16x8_c: 376.6 ( 1.00x)392.6 ( 1.00x)
sad_16x8_rvv_i32: 89.3 ( 4
From: sunyuechi
k230
banana_f3
put_uni_pixels_chroma_8_4x4_c: 128.3 ( 1.00x)90.5 (
1.00x)
put_uni_pixels_chroma_8_4x4_rvv_i32:17.6 ( 7.30x)17.4 (
5.18x)
put_uni_pixels_chroma
From: sunyuechi
k230
banana_f3
put_hevc_pel_uni_pixels4_8_c: 126.3 ( 1.00x)90.5 (
1.00x)
put_hevc_pel_uni_pixels4_8_rvv_i32: 24.6 ( 5.14x)17.5 (
5.18x)
put_hevc_pel_uni_pixe
From: sunyuechi
---
libavcodec/riscv/h26x/asm.S | 127 ++
libavcodec/riscv/vvc/vvc_mc_rvv.S | 109 +
2 files changed, 128 insertions(+), 108 deletions(-)
create mode 100644 libavcodec/riscv/h26x/asm.S
diff --git a/libavcodec/riscv/h26x/
From: sunyuechi
k230
banana_f3
put_chroma_pixels_8_4x4_c: 61.5 ( 1.00x)69.5 (
1.00x)
put_chroma_pixels_8_4x4_rvv_i32:33.8 ( 1.82x)38.2 (
1.82x)
put_chroma_pixels_8_8
From: sunyuechi
k230 banana_f3
dmvr_8_12x20_c: 619.3 ( 1.00x)624.1 ( 1.00x)
dmvr_8_12x20_rvv_i32: 128.6 ( 4.82x)103.4 ( 6.04x)
dmvr_8_20x12_c: 610.0 ( 1.00x)665.6 ( 1.00x)
dm
From: sunyuechi
k230 banana_f3
dmvr_8_12x20_c: 626.5 ( 1.00x)621.7 ( 1.00x)
dmvr_8_12x20_rvv_i32: 126.3 ( 4.96x)79.9 ( 7.78x)
dmvr_8_20x12_c: 608.0 ( 1.00x)652.9 ( 1.00x)
dmv
From: sunyuechi
---
libavcodec/riscv/vvc/vvc_mc_rvv.S | 46 +++
1 file changed, 23 insertions(+), 23 deletions(-)
diff --git a/libavcodec/riscv/vvc/vvc_mc_rvv.S
b/libavcodec/riscv/vvc/vvc_mc_rvv.S
index 45f4750f82..18532616d9 100644
--- a/libavcodec/riscv/vvc/vvc_mc
From: sunyuechi
k230 banana_f3
dmvr_8_12x20_c: 628.5 ( 1.00x)624.1 ( 1.00x)
dmvr_8_12x20_rvv_i32: 137.5 ( 4.57x)92.9 ( 6.72x)
dmvr_8_20x12_c: 609.7 ( 1.00x)655.4 ( 1.00x)
dmv
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.0
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.2
avg_8_2x4_rvv_i3
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 12.7 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:4.74.2
vp9_avg_8tap_smooth_4v_8bpp_c : 29.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_bilin_4h_8bpp_c:5.54.7
vp9_avg_bilin_4h_8bpp_rvv_i32 :1.71.5
vp9_avg_bilin_4v_8bpp_c:5.54.7
vp9_avg_bilin_4v
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 12.7 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:4.74.2
vp9_avg_8tap_smooth_4v_8bpp_c : 29.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_bilin_4hv_8bpp_c : 10.79.5
vp9_avg_bilin_4hv_8bpp_rvv_i32 :4.03.5
vp9_avg_bilin_8hv_8bpp_c : 38.5 34.2
vp9_avg_bilin_8h
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.0
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.2
avg_8_2x4_rvv_i3
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_bilin_4hv_8bpp_c : 10.79.5
vp9_avg_bilin_4hv_8bpp_rvv_i32 :4.03.5
vp9_avg_bilin_8hv_8bpp_c : 38.5 34.2
vp9_avg_bilin_8h
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 12.7 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:4.74.2
vp9_avg_8tap_smooth_4v_8bpp_c : 29.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_bilin_4h_8bpp_c:5.54.7
vp9_avg_bilin_4h_8bpp_rvv_i32 :1.71.5
vp9_avg_bilin_4v_8bpp_c:5.54.7
vp9_avg_bilin_4v
From: sunyuechi
C908 X60
vp9_avg_bilin_4h_8bpp_c:5.54.7
vp9_avg_bilin_4h_8bpp_rvv_i32 :1.71.5
vp9_avg_bilin_4v_8bpp_c:5.54.7
vp9_avg_bilin_4v
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 12.7 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:4.74.2
vp9_avg_8tap_smooth_4v_8bpp_c : 29.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_bilin_4hv_8bpp_c : 10.79.5
vp9_avg_bilin_4hv_8bpp_rvv_i32 :4.03.5
vp9_avg_bilin_8hv_8bpp_c : 38.5 34.2
vp9_avg_bilin_8h
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 12.7 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:4.74.2
vp9_avg_8tap_smooth_4v_8bpp_c : 29.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.0
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.2
avg_8_2x4_rvv_i3
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.2
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
vp8_loop_filter_simple_h_c :6.25.7
vp8_loop_filter_simple_h_rvv_i32 :3.02.5
vp8_loop_filter_simple_v_c :6.56.2
vp8_loop_filter_
From: sunyuechi
C908 X60
vp8_loop_filter8uv_h_c : 12.2 10.0
vp8_loop_filter8uv_h_rvv_i32 : 11.59.7
vp8_loop_filter8uv_v_c : 13.2 11.2
vp8_loop_filter8
From: sunyuechi
X60 new
vp8_put_bilin16_h_c: 42.542.5
vp8_put_bilin16_h_rvv_i32 :4.7 3.2
vp8_put_bilin16_hv_c : 71.571.7
vp8_put_bili
From: sunyuechi
C908 X60
vp8_loop_filter8uv_inner_h_c : 11.09.5
vp8_loop_filter8uv_inner_h_rvv_i32 : 10.58.7
vp8_loop_filter8uv_inner_v_c : 11.2 11.0
vp8_loop_filter8
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.2
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.0
avg_8_2x2_rvv_i32 :1.01.0
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.0
avg_8_2x2_rvv_i32 :1.01.0
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
vp8_loop_filter8uv_inner_v_c : 12.5 11.0
vp8_loop_filter8uv_inner_v_rvv_i32 :7.76.2
vp8_loop_filter16y_inner_h_c : 11.7 10.2
vp8_loop_filter1
From: sunyuechi
X60 new
vp8_put_bilin16_h_c: 42.542.5
vp8_put_bilin16_h_rvv_i32 :4.7 3.2
vp8_put_bilin16_hv_c : 71.571.7
vp8_put_bili
From: sunyuechi
C908 X60
vp8_loop_filter_simple_h_c :7.06.0
vp8_loop_filter_simple_h_rvv_i32 :3.22.7
vp8_loop_filter_simple_v_c :7.26.5
vp8_loop_filter_
From: sunyuechi
C908 X60
vp8_loop_filter8uv_v_c : 13.7 11.7
vp8_loop_filter8uv_v_rvv_i32 :7.76.2
vp8_loop_filter16y_h_c : 12.2 11.2
vp8_loop_filter1
From: sunyuechi
C908 X60
vp9_avg_bilin_4hv_8bpp_c : 10.79.5
vp9_avg_bilin_4hv_8bpp_rvv_i32 :4.03.5
vp9_avg_bilin_8hv_8bpp_c : 38.5 34.2
vp9_avg_bilin_8h
From: sunyuechi
C908 X60
vp9_avg_bilin_4h_8bpp_c:5.54.7
vp9_avg_bilin_4h_8bpp_rvv_i32 :1.71.5
vp9_avg_bilin_4v_8bpp_c:5.54.7
vp9_avg_bilin_4v
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 12.7 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:4.74.2
vp9_avg_8tap_smooth_4v_8bpp_c : 29.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
---
libavcodec/riscv/vp8dsp_rvv.S | 10 +++---
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index a5f2e34f44..6ad349741e 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.0
avg_8_2x2_rvv_i32 :1.01.0
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
avg_8_2x2_c:1.21.0
avg_8_2x2_rvv_i32 :1.01.0
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
avg_8_2x2_c:1.01.0
avg_8_2x2_rvv_i32 :1.01.0
avg_8_2x4_c:1.72.0
avg_8_2x4_rvv_i
From: sunyuechi
Since len < 64, the registers are sufficient, so it can be
directly unrolled (a4 is even).
Another benefit of unrolling is that it reduces one load operation
vertically compared to horizontally.
old new
From: sunyuechi
C908:
vp9_avg_bilin_4hv_8bpp_c: 11.0
vp9_avg_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_avg_bilin_16hv_8bpp_c: 147.0
vp9_avg_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_avg_bilin_32hv_8bpp_c: 574.5
vp9_avg_bilin_32hv_8bpp_rvv_i64: 42.7
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.2
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
Avoid potential naming conflicts
---
libavcodec/riscv/vp9_mc_rvv.S | 4 ++--
libavcodec/riscv/vp9dsp.h | 4 ++--
libavcodec/riscv/vp9dsp_init.c | 8
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 13.0 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:5.04.2
vp9_avg_8tap_smooth_4v_8bpp_c : 13.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908:
vp9_avg_bilin_4h_8bpp_c: 5.2
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8h_8bpp_c: 20.0
vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8v_8bpp_c: 21.0
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16h
From: sunyuechi
C908 X60
avg_8_2x2_c:1.01.0
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.2
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908:
vp9_avg_bilin_4hv_8bpp_c: 11.0
vp9_avg_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_avg_bilin_16hv_8bpp_c: 147.0
vp9_avg_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_avg_bilin_32hv_8bpp_c: 574.5
vp9_avg_bilin_32hv_8bpp_rvv_i64: 42.7
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 13.0 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:5.04.2
vp9_avg_8tap_smooth_4v_8bpp_c : 13.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908:
vp9_avg_bilin_4h_8bpp_c: 5.2
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8h_8bpp_c: 20.0
vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8v_8bpp_c: 21.0
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16h
From: sunyuechi
C908:
vp9_avg4_8bpp_c: 1.2
vp9_avg4_8bpp_rvv_i64: 1.0
vp9_avg8_8bpp_c: 3.7
vp9_avg8_8bpp_rvv_i64: 1.5
vp9_avg16_8bpp_c: 14.7
vp9_avg16_8bpp_rvv_i64: 3.5
vp9_avg32_8bpp_c: 57.7
vp9_avg32_8bpp_rvv_i64: 10.0
vp9_avg64_8bpp_c: 229.0
vp9_avg64_8bpp_rvv_i64: 31.7
---
libavcodec/riscv/M
From: sunyuechi
C908 X60
avg_8_2x2_c:1.01.0
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908 X60
avg_8_2x2_c:1.01.0
avg_8_2x2_rvv_i32 :0.70.7
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i
From: sunyuechi
C908:
vp8_put_epel4_h4v4_c: 20.0
vp8_put_epel4_h4v4_rvv_i32: 11.0
vp8_put_epel4_h4v6_c: 25.2
vp8_put_epel4_h4v6_rvv_i32: 13.5
vp8_put_epel4_h6v4_c: 22.2
vp8_put_epel4_h6v4_rvv_i32: 14.5
vp8_put_epel4_h6v6_c: 29.0
vp8_put_epel4_h6v6_rvv_i32: 15.7
vp8_put_epel8_h4v4_c: 73.0
vp8_put_
From: sunyuechi
C908:
vp9_avg_bilin_4hv_8bpp_c: 11.0
vp9_avg_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_avg_bilin_16hv_8bpp_c: 147.0
vp9_avg_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_avg_bilin_32hv_8bpp_c: 574.5
vp9_avg_bilin_32hv_8bpp_rvv_i64: 42.7
From: sunyuechi
C908:
vp9_avg4_8bpp_c: 1.2
vp9_avg4_8bpp_rvv_i64: 1.0
vp9_avg8_8bpp_c: 3.7
vp9_avg8_8bpp_rvv_i64: 1.5
vp9_avg16_8bpp_c: 14.7
vp9_avg16_8bpp_rvv_i64: 3.5
vp9_avg32_8bpp_c: 57.7
vp9_avg32_8bpp_rvv_i64: 10.0
vp9_avg64_8bpp_c: 229.0
vp9_avg64_8bpp_rvv_i64: 31.7
---
libavcodec/riscv/M
From: sunyuechi
C908:
vp9_avg_bilin_4h_8bpp_c: 5.2
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8h_8bpp_c: 20.0
vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8v_8bpp_c: 21.0
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16h
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.2
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 13.0 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:5.04.2
vp9_avg_8tap_smooth_4v_8bpp_c : 13.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908:
vp9_avg4_8bpp_c: 1.2
vp9_avg4_8bpp_rvv_i64: 1.0
vp9_avg8_8bpp_c: 3.7
vp9_avg8_8bpp_rvv_i64: 1.5
vp9_avg16_8bpp_c: 14.7
vp9_avg16_8bpp_rvv_i64: 3.5
vp9_avg32_8bpp_c: 57.7
vp9_avg32_8bpp_rvv_i64: 10.0
vp9_avg64_8bpp_c: 229.0
vp9_avg64_8bpp_rvv_i64: 31.7
---
libavcodec/riscv/M
From: sunyuechi
C908:
vp9_tm_4x4_8bpp_c: 116.5
vp9_tm_4x4_8bpp_rvv_i32: 43.5
vp9_tm_8x8_8bpp_c: 416.2
vp9_tm_8x8_8bpp_rvv_i32: 86.0
vp9_tm_16x16_8bpp_c: 1665.5
vp9_tm_16x16_8bpp_rvv_i32: 187.2
vp9_tm_32x32_8bpp_c: 6974.2
vp9_tm_32x32_8bpp_rvv_i32: 625.7
---
libavcodec/riscv/vp9_intra_rvv.S | 118
From: sunyuechi
C908:
vp9_tm_4x4_8bpp_c: 116.5
vp9_tm_4x4_8bpp_rvv_i32: 43.5
vp9_tm_8x8_8bpp_c: 416.2
vp9_tm_8x8_8bpp_rvv_i32: 86.0
vp9_tm_16x16_8bpp_c: 1665.5
vp9_tm_16x16_8bpp_rvv_i32: 187.2
vp9_tm_32x32_8bpp_c: 6974.2
vp9_tm_32x32_8bpp_rvv_i32: 625.7
---
libavcodec/riscv/vp9_intra_rvv.S | 123
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 13.0 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:5.04.2
vp9_avg_8tap_smooth_4v_8bpp_c : 13.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908:
vp9_avg_bilin_4hv_8bpp_c: 11.0
vp9_avg_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_avg_bilin_16hv_8bpp_c: 147.0
vp9_avg_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_avg_bilin_32hv_8bpp_c: 574.5
vp9_avg_bilin_32hv_8bpp_rvv_i64: 42.7
From: sunyuechi
C908:
vp9_avg_bilin_4h_8bpp_c: 5.2
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8h_8bpp_c: 20.0
vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8v_8bpp_c: 21.0
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16h
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.2
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908:
vp9_avg4_8bpp_c: 1.2
vp9_avg4_8bpp_rvv_i64: 1.0
vp9_avg8_8bpp_c: 3.7
vp9_avg8_8bpp_rvv_i64: 1.5
vp9_avg16_8bpp_c: 14.7
vp9_avg16_8bpp_rvv_i64: 3.5
vp9_avg32_8bpp_c: 57.7
vp9_avg32_8bpp_rvv_i64: 10.0
vp9_avg64_8bpp_c: 229.0
vp9_avg64_8bpp_rvv_i64: 31.7
---
libavcodec/riscv/M
From: sunyuechi
C908:
vp9_tm_4x4_8bpp_c: 116.5
vp9_tm_4x4_8bpp_rvv_i32: 43.5
vp9_tm_8x8_8bpp_c: 416.2
vp9_tm_8x8_8bpp_rvv_i32: 86.0
vp9_tm_16x16_8bpp_c: 1665.5
vp9_tm_16x16_8bpp_rvv_i32: 187.2
vp9_tm_32x32_8bpp_c: 6974.2
vp9_tm_32x32_8bpp_rvv_i32: 625.7
---
libavcodec/riscv/vp9_intra_rvv.S | 141
From: sunyuechi
C908:
vp9_vert_8x8_8bpp_c: 22.0
vp9_vert_8x8_8bpp_rvi: 15.7
vp9_vert_16x16_8bpp_c: 71.2
vp9_vert_16x16_8bpp_rvi: 39.0
vp9_vert_32x32_8bpp_c: 300.2
vp9_vert_32x32_8bpp_rvi: 135.2
---
libavcodec/riscv/Makefile| 1 +
libavcodec/riscv/vp9_intra_rvi.S | 71 +++
From: sunyuechi
C908:
vp9_hor_8x8_8bpp_c: 74.7
vp9_hor_8x8_8bpp_rvv_i32: 35.7
vp9_hor_16x16_8bpp_c: 175.5
vp9_hor_16x16_8bpp_rvv_i32: 80.2
vp9_hor_32x32_8bpp_c: 510.2
vp9_hor_32x32_8bpp_rvv_i32: 264.0
---
libavcodec/riscv/vp9_intra_rvv.S | 56
libavcodec/riscv/vp
From: sunyuechi
C908:
vp9_put4_8bpp_c: 0.7
vp9_put4_8bpp_rvi: 0.5
vp9_put8_8bpp_c: 2.5
vp9_put8_8bpp_rvi: 0.5
vp9_put16_8bpp_c: 16.7
vp9_put16_8bpp_rvi: 1.5
vp9_put32_8bpp_c: 37.2
vp9_put32_8bpp_rvi: 5.7
vp9_put64_8bpp_c: 107.5
vp9_put64_8bpp_rvi: 21.7
---
libavcodec/riscv/Makefile | 3 +-
From: sunyuechi
C908 X60
vc1dsp.avg_vc1_mspel_pixels_tab[0][0]_c: 14.7 13.2
vc1dsp.avg_vc1_mspel_pixels_tab[0][0]_rvv_i32 : 2.5 2.2
vc1dsp.avg_vc1_mspel_pixels_tab[1][0]_c: 3.7 3.5
vc1dsp.avg_vc1_mspel_pixel
From: sunyuechi
C908 X60
vc1dsp.avg_vc1_mspel_pixels_tab[0][0]_c: 14.7 13.2
vc1dsp.avg_vc1_mspel_pixels_tab[0][0]_rvv_i32 : 2.5 2.2
vc1dsp.avg_vc1_mspel_pixels_tab[1][0]_c: 3.7 3.5
vc1dsp.avg_vc1_mspel_pixel
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4h_8bpp_c : 13.0 11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:5.04.2
vp9_avg_8tap_smooth_4v_8bpp_c : 13.7 12.5
vp9_avg_8tap_smo
From: sunyuechi
C908 X60
vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.2
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2
vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2
vp9_avg_8tap_smo
From: sunyuechi
C908:
vp9_avg_bilin_4hv_8bpp_c: 11.0
vp9_avg_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_avg_bilin_16hv_8bpp_c: 147.0
vp9_avg_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_avg_bilin_32hv_8bpp_c: 574.5
vp9_avg_bilin_32hv_8bpp_rvv_i64: 42.7
From: sunyuechi
C908:
vp9_vert_8x8_8bpp_c: 22.0
vp9_vert_8x8_8bpp_rvi: 15.7
vp9_vert_16x16_8bpp_c: 71.2
vp9_vert_16x16_8bpp_rvi: 39.0
vp9_vert_32x32_8bpp_c: 300.2
vp9_vert_32x32_8bpp_rvi: 135.2
---
libavcodec/riscv/Makefile| 1 +
libavcodec/riscv/vp9_intra_rvi.S | 71 +++
From: sunyuechi
C908:
vp9_avg_bilin_4h_8bpp_c: 5.2
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8h_8bpp_c: 20.0
vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8v_8bpp_c: 21.0
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16h
From: sunyuechi
C908:
vp9_hor_8x8_8bpp_c: 74.7
vp9_hor_8x8_8bpp_rvv_i32: 35.7
vp9_hor_16x16_8bpp_c: 175.5
vp9_hor_16x16_8bpp_rvv_i32: 80.2
vp9_hor_32x32_8bpp_c: 510.2
vp9_hor_32x32_8bpp_rvv_i32: 264.0
---
libavcodec/riscv/vp9_intra_rvv.S | 56
libavcodec/riscv/vp
From: sunyuechi
C908:
vp9_put4_8bpp_c: 0.7
vp9_put4_8bpp_rvi: 0.5
vp9_put8_8bpp_c: 2.5
vp9_put8_8bpp_rvi: 0.5
vp9_put16_8bpp_c: 16.7
vp9_put16_8bpp_rvi: 1.5
vp9_put32_8bpp_c: 37.2
vp9_put32_8bpp_rvi: 5.7
vp9_put64_8bpp_c: 107.5
vp9_put64_8bpp_rvi: 21.7
---
libavcodec/riscv/Makefile | 3 +-
From: sunyuechi
C908:
vp9_tm_4x4_8bpp_c: 116.5
vp9_tm_4x4_8bpp_rvv_i32: 43.5
vp9_tm_8x8_8bpp_c: 416.2
vp9_tm_8x8_8bpp_rvv_i32: 86.0
vp9_tm_16x16_8bpp_c: 1665.5
vp9_tm_16x16_8bpp_rvv_i32: 187.2
vp9_tm_32x32_8bpp_c: 6974.2
vp9_tm_32x32_8bpp_rvv_i32: 625.7
---
libavcodec/riscv/vp9_intra_rvv.S | 141
From: sunyuechi
C908:
vp9_avg4_8bpp_c: 1.2
vp9_avg4_8bpp_rvv_i64: 1.0
vp9_avg8_8bpp_c: 3.7
vp9_avg8_8bpp_rvv_i64: 1.5
vp9_avg16_8bpp_c: 14.7
vp9_avg16_8bpp_rvv_i64: 3.5
vp9_avg32_8bpp_c: 57.7
vp9_avg32_8bpp_rvv_i64: 10.0
vp9_avg64_8bpp_c: 229.0
vp9_avg64_8bpp_rvv_i64: 31.7
---
libavcodec/riscv/M
From: sunyuechi
C908:
vp8_put_epel4_h4v4_c: 20.0
vp8_put_epel4_h4v4_rvv_i32: 11.0
vp8_put_epel4_h4v6_c: 25.2
vp8_put_epel4_h4v6_rvv_i32: 13.5
vp8_put_epel4_h6v4_c: 22.2
vp8_put_epel4_h6v4_rvv_i32: 14.5
vp8_put_epel4_h6v6_c: 29.0
vp8_put_epel4_h6v6_rvv_i32: 15.7
vp8_put_epel8_h4v4_c: 73.0
vp8_put_
From: sunyuechi
C908 X60
vc1dsp.avg_vc1_mspel_pixels_tab[0][0]_c: 14.7 13.2
vc1dsp.avg_vc1_mspel_pixels_tab[0][0]_rvv_i32 : 2.5 2.2
vc1dsp.avg_vc1_mspel_pixels_tab[1][0]_c: 3.7 3.5
vc1dsp.avg_vc1_mspel_pixel
From: sunyuechi
C908:
vp8_loop_filter8uv_v_c: 745.5
vp8_loop_filter8uv_v_rvv_i32: 467.2
vp8_loop_filter16y_h_c: 674.2
vp8_loop_filter16y_h_rvv_i32: 553.0
vp8_loop_filter16y_v_c: 732.7
vp8_loop_filter16y_v_rvv_i32: 324.5
---
libavcodec/riscv/vp8dsp_init.c | 4 +++
libavcodec/riscv/vp8dsp_rvv.S
From: sunyuechi
C908:
vp8_loop_filter8uv_inner_v_c: 738.2
vp8_loop_filter8uv_inner_v_rvv_i32: 455.2
vp8_loop_filter16y_inner_h_c: 685.0
vp8_loop_filter16y_inner_h_rvv_i32: 497.0
vp8_loop_filter16y_inner_v_c: 743.7
vp8_loop_filter16y_inner_v_rvv_i32: 295.7
---
libavcodec/riscv/vp8dsp_init.c | 4
From: sunyuechi
C908:
vp8_loop_filter_simple_h_c: 416.0
vp8_loop_filter_simple_h_rvv_i32: 187.5
vp8_loop_filter_simple_v_c: 429.7
vp8_loop_filter_simple_v_rvv_i32: 104.0
---
libavcodec/riscv/vp8dsp_init.c | 5 ++
libavcodec/riscv/vp8dsp_rvv.S | 85 ++
2 files ch
From: sunyuechi
C908:
vp8_put_bilin4_hv_c: 561.0
vp8_put_bilin4_hv_rvv_i32: 232.7
vp8_put_bilin8_hv_c: 2162.7
vp8_put_bilin8_hv_rvv_i32: 506.7
vp8_put_bilin16_hv_c: 4769.7
vp8_put_bilin16_hv_rvv_i32: 556.7
---
libavcodec/riscv/vp8dsp_init.c | 13 +
libavcodec/riscv/vp8dsp_rvv.S | 26
From: sunyuechi
C908:
vp8_put_epel4_h4v4_c: 20.0
vp8_put_epel4_h4v4_rvv_i32: 11.0
vp8_put_epel4_h4v6_c: 25.2
vp8_put_epel4_h4v6_rvv_i32: 13.5
vp8_put_epel4_h6v4_c: 22.2
vp8_put_epel4_h6v4_rvv_i32: 14.5
vp8_put_epel4_h6v6_c: 29.0
vp8_put_epel4_h6v6_rvv_i32: 15.7
vp8_put_epel8_h4v4_c: 73.0
vp8_put_
From: sunyuechi
C908:
vp8_put_epel4_v4_c: 11.0
vp8_put_epel4_v4_rvv_i32: 5.0
vp8_put_epel4_v6_c: 16.5
vp8_put_epel4_v6_rvv_i32: 6.2
vp8_put_epel8_v4_c: 43.7
vp8_put_epel8_v4_rvv_i32: 11.2
vp8_put_epel8_v6_c: 68.7
vp8_put_epel8_v6_rvv_i32: 13.2
vp8_put_epel16_v4_c: 92.5
vp8_put_epel16_v4_rvv_i32:
1 - 100 of 155 matches
Mail list logo