Adds checkasm for DMVR SAD AVX2 implementation. Benchmarks ( AMD 7940HS ) vvc_sad_8_16bpc_c: 112.5 vvc_sad_8_16bpc_avx2: 2.5 vvc_sad_16_16bpc_c: 232.5 vvc_sad_16_16bpc_avx2: 22.5 vvc_sad_32_16bpc_c: 912.5 vvc_sad_32_16bpc_avx2: 82.5 vvc_sad_64_16bpc_c: 3582.5 vvc_sad_64_16bpc_avx2: 392.5 vvc_sad_128_16bpc_c: 16702.5 vvc_sad_128_16bpc_avx2: 1912.5
Before: BQTerrace_1920x1080_60_10_420_22_RA.vvc | 80.7 | Chimera_8bit_1080P_1000_frames.vvc | 158.0 | NovosobornayaSquare_1920x1080.bin | 159.7 | RitualDance_1920x1080_60_10_420_37_RA.266 | 146.3 | After: BQTerrace_1920x1080_60_10_420_22_RA.vvc | 82.7 | Chimera_8bit_1080P_1000_frames.vvc | 167.0 | NovosobornayaSquare_1920x1080.bin | 166.3 | RitualDance_1920x1080_60_10_420_37_RA.266 | 154.0 | --- tests/checkasm/vvc_mc.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c index 97f57cb401..77dd32fbbb 100644 --- a/tests/checkasm/vvc_mc.c +++ b/tests/checkasm/vvc_mc.c @@ -322,8 +322,46 @@ static void check_avg(void) report("avg"); } +static void check_vvc_sad(void) +{ + const int bit_depth = 10; + VVCDSPContext c; + LOCAL_ALIGNED_32(uint16_t, src0, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]); + LOCAL_ALIGNED_32(uint16_t, src1, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]); + declare_func(int, const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); + + ff_vvc_dsp_init(&c, bit_depth); + memset(src0, 0, MAX_CTU_SIZE * MAX_CTU_SIZE * 2); + memset(src1, 0, MAX_CTU_SIZE * MAX_CTU_SIZE * 2); + + randomize_pixels(src0, src1, MAX_CTU_SIZE * MAX_CTU_SIZE * 2); + for (int h = 8; h <= MAX_CTU_SIZE; h *= 2) { + for (int w = 8; w <= MAX_CTU_SIZE; w *= 2) { + for(int offy = 0; offy <= 4; offy++) { + for(int offx = 0; offx <= 4; offx++) { + if(check_func(c.inter.sad[av_log2(w)-2], "vvc_sad_%dx%d", w, h)) { + int result0; + int result1; + + result0 = call_ref(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h); + result1 = call_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h); + + if (result1 != result0) + fail(); + if(w == h && offx == 0 && offy == 0) + bench_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h); + } + } + } + } + } + + report("check_vvc_sad"); +} + void checkasm_check_vvc_mc(void) { + check_vvc_sad(); check_put_vvc_luma(); check_put_vvc_luma_uni(); check_put_vvc_chroma(); -- 2.44.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".