Re: [FFmpeg-devel] [PATCH 4/4] lavc/rv34dsp: R-V V rv34_idct_dc_add
I tested this in '[FFmpeg-devel] [PATCH] lavc/vc1dsp: R-V V inv_trans'. The logic here is the same, using vext can reduce vset, making it a bit faster Rémi Denis-Courmont 于2024年2月13日周二 03:46写道: > Le keskiviikkona 31. tammikuuta 2024, 19.58.55 EET flow gg a écrit : > > Fixed the rv32 break in this reply > > It looks like widening add would avoid the sign extension. > > Although you'd need as many instructions, since V lacks signed to unsigned > clipping. > > -- > Rémi Denis-Courmont > http://www.remlab.net/ > > > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 4/4] lavc/rv34dsp: R-V V rv34_idct_dc_add
Le keskiviikkona 31. tammikuuta 2024, 19.58.55 EET flow gg a écrit : > Fixed the rv32 break in this reply It looks like widening add would avoid the sign extension. Although you'd need as many instructions, since V lacks signed to unsigned clipping. -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 4/4] lavc/rv34dsp: R-V V rv34_idct_dc_add
Fixed the rv32 break in this reply flow gg 于2024年1月31日周三 20:01写道: > > From 0874f319e1c26aa0eeb5ed0d4e00d29aec4c5af8 Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Wed, 31 Jan 2024 19:04:11 +0800 Subject: [PATCH 4/4] lavc/rv34dsp: R-V V rv34_idct_dc_add C908: rv34_idct_dc_add_c: 134.7 rv34_idct_dc_add_rvv_i32: 45.5 --- libavcodec/riscv/rv34dsp_init.c | 2 ++ libavcodec/riscv/rv34dsp_rvv.S | 20 2 files changed, 22 insertions(+) diff --git a/libavcodec/riscv/rv34dsp_init.c b/libavcodec/riscv/rv34dsp_init.c index 852c8ad9a8..7dcadc7e43 100644 --- a/libavcodec/riscv/rv34dsp_init.c +++ b/libavcodec/riscv/rv34dsp_init.c @@ -26,6 +26,7 @@ #include "libavcodec/rv34dsp.h" void ff_rv34_inv_transform_dc_rvv(int16_t *block); +void ff_rv34_idct_dc_add_rvv(uint8_t *dst, ptrdiff_t stride, int dc); av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c) { @@ -34,6 +35,7 @@ av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c) if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { c->rv34_inv_transform_dc = ff_rv34_inv_transform_dc_rvv; +c->rv34_idct_dc_add = ff_rv34_idct_dc_add_rvv; } #endif } diff --git a/libavcodec/riscv/rv34dsp_rvv.S b/libavcodec/riscv/rv34dsp_rvv.S index 4b7a071f7c..450226f742 100644 --- a/libavcodec/riscv/rv34dsp_rvv.S +++ b/libavcodec/riscv/rv34dsp_rvv.S @@ -36,3 +36,23 @@ func ff_rv34_inv_transform_dc_rvv, zve32x ret endfunc + +func ff_rv34_idct_dc_add_rvv, zve32x +vsetivli zero, 4, e8, mf4, ta, ma +vlse32.v v0, (a0), a1 +lit1, 169 +mul t1, t1, a2 +lia2, 255 +addi t1, t1, 512 +srai t1, t1, 10 +vsetivli zero, 4*4, e16, m2, ta, ma +vzext.vf2 v2, v0 +vadd.vx v2, v2, t1 +vmax.vx v2, v2, zero +vsetvli zero, zero, e8, m1, ta, ma +vnclipu.wiv0, v2, 0 +vsetivli zero, 4, e8, mf4, ta, ma +vsse32.v v0, (a0), a1 + +ret +endfunc -- 2.43.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 4/4] lavc/rv34dsp: R-V V rv34_idct_dc_add
From aec115a7179f23642c2b1c1a1cae3253a40c38fc Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Wed, 31 Jan 2024 19:04:11 +0800 Subject: [PATCH 4/4] lavc/rv34dsp: R-V V rv34_idct_dc_add C908: rv34_idct_dc_add_c: 134.7 rv34_idct_dc_add_rvv_i32: 45.5 --- libavcodec/riscv/rv34dsp_init.c | 2 ++ libavcodec/riscv/rv34dsp_rvv.S | 20 2 files changed, 22 insertions(+) diff --git a/libavcodec/riscv/rv34dsp_init.c b/libavcodec/riscv/rv34dsp_init.c index 852c8ad9a8..7dcadc7e43 100644 --- a/libavcodec/riscv/rv34dsp_init.c +++ b/libavcodec/riscv/rv34dsp_init.c @@ -26,6 +26,7 @@ #include "libavcodec/rv34dsp.h" void ff_rv34_inv_transform_dc_rvv(int16_t *block); +void ff_rv34_idct_dc_add_rvv(uint8_t *dst, ptrdiff_t stride, int dc); av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c) { @@ -34,6 +35,7 @@ av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c) if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { c->rv34_inv_transform_dc = ff_rv34_inv_transform_dc_rvv; +c->rv34_idct_dc_add = ff_rv34_idct_dc_add_rvv; } #endif } diff --git a/libavcodec/riscv/rv34dsp_rvv.S b/libavcodec/riscv/rv34dsp_rvv.S index acf5b0c3e8..c73b9c4555 100644 --- a/libavcodec/riscv/rv34dsp_rvv.S +++ b/libavcodec/riscv/rv34dsp_rvv.S @@ -36,3 +36,23 @@ func ff_rv34_inv_transform_dc_rvv, zve32x ret endfunc + +func ff_rv34_idct_dc_add_rvv, zve32x +vsetivli zero, 4, e8, mf4, ta, ma +vlse32.v v0, (a0), a1 +lit1, 169 +mulw t1, t1, a2 +lia2, 255 +addiw t1, t1, 512 +sraiw t1, t1, 10 +vsetivli zero, 4*4, e16, m2, ta, ma +vzext.vf2 v2, v0 +vadd.vx v2, v2, t1 +vmax.vx v2, v2, zero +vsetvli zero, zero, e8, m1, ta, ma +vnclipu.wiv0, v2, 0 +vsetivli zero, 4, e8, mf4, ta, ma +vsse32.v v0, (a0), a1 + +ret +endfunc -- 2.43.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".