Because of the 3/4 update, updated it." <u...@foxmail.com> 于2024年7月23日周二 16:59写道:
> From: sunyuechi <sunyue...@iscas.ac.cn> > > C908 X60 > vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0 > vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2 > vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2 > vp9_avg_8tap_smooth_8hv_8bpp_rvv_i32 : 23.7 21.2 > vp9_avg_8tap_smooth_16hv_8bpp_c : 355.7 297.0 > vp9_avg_8tap_smooth_16hv_8bpp_rvv_i32 : 47.0 41.5 > vp9_avg_8tap_smooth_32hv_8bpp_c : 1272.7 1099.7 > vp9_avg_8tap_smooth_32hv_8bpp_rvv_i32 : 134.7 119.7 > vp9_avg_8tap_smooth_64hv_8bpp_c : 4937.0 4224.2 > vp9_avg_8tap_smooth_64hv_8bpp_rvv_i32 : 528.5 228.5 > vp9_put_8tap_smooth_4hv_8bpp_c : 30.2 26.7 > vp9_put_8tap_smooth_4hv_8bpp_rvv_i32 : 30.5 12.5 > vp9_put_8tap_smooth_8hv_8bpp_c : 91.5 81.2 > vp9_put_8tap_smooth_8hv_8bpp_rvv_i32 : 22.7 20.2 > vp9_put_8tap_smooth_16hv_8bpp_c : 313.2 277.5 > vp9_put_8tap_smooth_16hv_8bpp_rvv_i32 : 45.2 40.2 > vp9_put_8tap_smooth_32hv_8bpp_c : 1166.7 1022.2 > vp9_put_8tap_smooth_32hv_8bpp_rvv_i32 : 131.7 117.2 > vp9_put_8tap_smooth_64hv_8bpp_c : 4560.5 3961.7 > vp9_put_8tap_smooth_64hv_8bpp_rvv_i32 : 517.0 223.2 > --- > libavcodec/riscv/vp9_mc_rvv.S | 75 ++++++++++++++++++++++++++++++++++ > libavcodec/riscv/vp9dsp_init.c | 8 ++++ > 2 files changed, 83 insertions(+) > > diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S > index 6a4be7b9bd..26754ac6f8 100644 > --- a/libavcodec/riscv/vp9_mc_rvv.S > +++ b/libavcodec/riscv/vp9_mc_rvv.S > @@ -366,6 +366,77 @@ func > ff_\op\()_vp9_8tap_\name\()_\len\()\type\()_rvv\vlen\(), zve32x > endfunc > .endm > > +#if __riscv_xlen == 64 > +.macro epel_hv_once len, name, op > + sub a2, a2, a3 > + sub a2, a2, a3 > + sub a2, a2, a3 > + .irp n,0,2,4,6,8,10,12,14 > + epel_load_inc v\n, \len, put, \name, h, 1, t > + .endr > + addi a4, a4, -1 > +1: > + addi a4, a4, -1 > + epel_load v30, \len, \op, \name, v, 0, s > + vse8.v v30, (a0) > + vmv.v.v v0, v2 > + vmv.v.v v2, v4 > + vmv.v.v v4, v6 > + vmv.v.v v6, v8 > + vmv.v.v v8, v10 > + vmv.v.v v10, v12 > + vmv.v.v v12, v14 > + epel_load v14, \len, put, \name, h, 1, t > + add a2, a2, a3 > + add a0, a0, a1 > + bnez a4, 1b > + epel_load v30, \len, \op, \name, v, 0, s > + vse8.v v30, (a0) > +.endm > + > +.macro epel_hv op, name, len, vlen > +func ff_\op\()_vp9_8tap_\name\()_\len\()hv_rvv\vlen\(), zve32x > + addi sp, sp, -64 > + .irp n,0,1,2,3,4,5,6,7 > + sd s\n, \n\()<<3(sp) > + .endr > +.if \len == 64 && \vlen < 256 > + addi sp, sp, -48 > + .irp n,0,1,2,3,4,5 > + sd a\n, \n\()<<3(sp) > + .endr > +.endif > +.ifc \op,avg > + csrwi vxrm, 0 > +.endif > + epel_filter \name, h, t, a7 > + epel_filter \name, v, s, s7 > +.if \vlen < 256 > + vsetvlstatic8 \len, a6, 32, m2 > +.else > + vsetvlstatic8 \len, a6, 64, m2 > +.endif > + epel_hv_once \len, \name, \op > +.if \len == 64 && \vlen < 256 > + .irp n,0,1,2,3,4,5 > + ld a\n, \n\()<<3(sp) > + .endr > + addi sp, sp, 48 > + addi a0, a0, 32 > + addi a2, a2, 32 > + epel_filter \name, h, t, a7 > + epel_hv_once \len, \name, \op > +.endif > + .irp n,0,1,2,3,4,5,6,7 > + ld s\n, \n\()<<3(sp) > + .endr > + addi sp, sp, 64 > + > + ret > +endfunc > +.endm > +#endif > + > .irp len, 64, 32, 16, 8, 4 > copy_avg \len > .irp op, put, avg > @@ -374,6 +445,10 @@ endfunc > epel \len, \op, \name, \type, 128 > epel \len, \op, \name, \type, 256 > .endr > + #if __riscv_xlen == 64 > + epel_hv \op, \name, \len, 128 > + epel_hv \op, \name, \len, 256 > + #endif > .endr > .endr > .endr > diff --git a/libavcodec/riscv/vp9dsp_init.c > b/libavcodec/riscv/vp9dsp_init.c > index 3669070fca..7b090c9889 100644 > --- a/libavcodec/riscv/vp9dsp_init.c > +++ b/libavcodec/riscv/vp9dsp_init.c > @@ -119,6 +119,10 @@ static av_cold void > vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp) > if (flags & AV_CPU_FLAG_RVB_ADDR) { > init_subpel2(0, 0, 1, v, put, 128); > init_subpel2(1, 0, 1, v, avg, 128); > +# if __riscv_xlen == 64 > + init_subpel2(0, 1, 1, hv, put, 128); > + init_subpel2(1, 1, 1, hv, avg, 128); > +# endif > } > > } > @@ -129,6 +133,10 @@ static av_cold void > vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp) > if (flags & AV_CPU_FLAG_RVB_ADDR) { > init_subpel2(0, 0, 1, v, put, 256); > init_subpel2(1, 0, 1, v, avg, 256); > +# if __riscv_xlen == 64 > + init_subpel2(0, 1, 1, hv, put, 256); > + init_subpel2(1, 1, 1, hv, avg, 256); > +# endif > } > } > } > -- > 2.45.2 > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".