Stone Chen: > Implements AVX2 DMVR (decoder-side motion vector refinement) SAD functions. > DMVR SAD is only calculated if w >= 8, h >= 8, and w * h > 128. To reduce > complexity, SAD is only calculated on even rows. This is calculated for all > video bitdepths, but the values passed to the function are always 16bit (even > if the original video bitdepth is 8). The AVX2 implementation uses > min/max/sub. > > Additionally this changes parameters dx and dy from int to intptr_t. This > allows dx & dy to be used as pointer offsets without needing to use movsxd. > > Benchmarks ( AMD 7940HS ) > Before: > BQTerrace_1920x1080_60_10_420_22_RA.vvc | 106.0 | > Chimera_8bit_1080P_1000_frames.vvc | 204.3 | > NovosobornayaSquare_1920x1080.bin | 197.3 | > RitualDance_1920x1080_60_10_420_37_RA.266 | 174.0 | > > After: > BQTerrace_1920x1080_60_10_420_22_RA.vvc | 109.3 | > Chimera_8bit_1080P_1000_frames.vvc | 216.0 | > NovosobornayaSquare_1920x1080.bin | 204.0| > RitualDance_1920x1080_60_10_420_37_RA.266 | 181.7 | > --- > libavcodec/vvc/dsp.c | 2 +- > libavcodec/vvc/dsp.h | 2 +- > libavcodec/x86/vvc/Makefile | 3 +- > libavcodec/x86/vvc/vvc_sad.asm | 130 +++++++++++++++++++++++++++++++ > libavcodec/x86/vvc/vvcdsp_init.c | 6 ++ > 5 files changed, 140 insertions(+), 3 deletions(-) > create mode 100644 libavcodec/x86/vvc/vvc_sad.asm > > diff --git a/libavcodec/x86/vvc/vvcdsp_init.c > b/libavcodec/x86/vvc/vvcdsp_init.c > index 0e68971b2c..aa6c916760 100644 > --- a/libavcodec/x86/vvc/vvcdsp_init.c > +++ b/libavcodec/x86/vvc/vvcdsp_init.c > @@ -311,6 +311,9 @@ ALF_FUNCS(16, 12, avx2) > c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2; \ > c->alf.classify = ff_vvc_alf_classify_##bd##_avx2; \ > } while (0) > + > +int ff_vvc_sad_avx2(const int16_t *src0, const int16_t *src1, intptr_t dx, > intptr_t dy, int block_w, int block_h); > +#define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2
You are adding an AVX2 function to an ARCH_X86_64 #if block. I expect this to lead to linking failures if AVX2 is disabled. > #endif > > void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) > @@ -327,6 +330,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const > int bd) > ALF_INIT(8); > AVG_INIT(8, avx2); > MC_LINKS_AVX2(8); > + SAD_INIT(); > } > break; > case 10: > @@ -338,6 +342,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const > int bd) > AVG_INIT(10, avx2); > MC_LINKS_AVX2(10); > MC_LINKS_16BPC_AVX2(10); > + SAD_INIT(); > } > break; > case 12: > @@ -349,6 +354,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const > int bd) > AVG_INIT(12, avx2); > MC_LINKS_AVX2(12); > MC_LINKS_16BPC_AVX2(12); > + SAD_INIT(); > } > break; > default: _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".