Stone Chen:
> Implements AVX2 DMVR (decoder-side motion vector refinement) SAD functions. 
> DMVR SAD is only calculated if w >= 8, h >= 8, and w * h > 128. To reduce 
> complexity, SAD is only calculated on even rows. This is calculated for all 
> video bitdepths, but the values passed to the function are always 16bit (even 
> if the original video bitdepth is 8). The AVX2 implementation uses 
> min/max/sub.
> 
> Additionally this changes parameters dx and dy from int to intptr_t. This 
> allows dx & dy to be used as pointer offsets without needing to use movsxd.
> 
> Benchmarks ( AMD 7940HS )
> Before:
> BQTerrace_1920x1080_60_10_420_22_RA.vvc | 106.0 |
> Chimera_8bit_1080P_1000_frames.vvc | 204.3 |
> NovosobornayaSquare_1920x1080.bin | 197.3 |
> RitualDance_1920x1080_60_10_420_37_RA.266 | 174.0 |
> 
> After:
> BQTerrace_1920x1080_60_10_420_22_RA.vvc | 109.3 |
> Chimera_8bit_1080P_1000_frames.vvc | 216.0 |
> NovosobornayaSquare_1920x1080.bin | 204.0|
> RitualDance_1920x1080_60_10_420_37_RA.266 | 181.7 |
> ---
>  libavcodec/vvc/dsp.c             |   2 +-
>  libavcodec/vvc/dsp.h             |   2 +-
>  libavcodec/x86/vvc/Makefile      |   3 +-
>  libavcodec/x86/vvc/vvc_sad.asm   | 130 +++++++++++++++++++++++++++++++
>  libavcodec/x86/vvc/vvcdsp_init.c |   6 ++
>  5 files changed, 140 insertions(+), 3 deletions(-)
>  create mode 100644 libavcodec/x86/vvc/vvc_sad.asm
> 
> diff --git a/libavcodec/x86/vvc/vvcdsp_init.c 
> b/libavcodec/x86/vvc/vvcdsp_init.c
> index 0e68971b2c..aa6c916760 100644
> --- a/libavcodec/x86/vvc/vvcdsp_init.c
> +++ b/libavcodec/x86/vvc/vvcdsp_init.c
> @@ -311,6 +311,9 @@ ALF_FUNCS(16, 12, avx2)
>      c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2;    \
>      c->alf.classify       = ff_vvc_alf_classify_##bd##_avx2;         \
>  } while (0)
> +
> +int ff_vvc_sad_avx2(const int16_t *src0, const int16_t *src1, intptr_t dx, 
> intptr_t dy, int block_w, int block_h);
> +#define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2

You are adding an AVX2 function to an ARCH_X86_64 #if block. I expect
this to lead to linking failures if AVX2 is disabled.

>  #endif
>  
>  void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
> @@ -327,6 +330,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const 
> int bd)
>              ALF_INIT(8);
>              AVG_INIT(8, avx2);
>              MC_LINKS_AVX2(8);
> +            SAD_INIT();
>          }
>          break;
>      case 10:
> @@ -338,6 +342,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const 
> int bd)
>              AVG_INIT(10, avx2);
>              MC_LINKS_AVX2(10);
>              MC_LINKS_16BPC_AVX2(10);
> +            SAD_INIT();
>          }
>          break;
>      case 12:
> @@ -349,6 +354,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const 
> int bd)
>              AVG_INIT(12, avx2);
>              MC_LINKS_AVX2(12);
>              MC_LINKS_16BPC_AVX2(12);
> +            SAD_INIT();
>          }
>          break;
>      default:

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to