From: Zhao Zhili <zhiliz...@tencent.com> sad_8x16_c: 0.8 ( 1.00x) sad_8x16_neon: 0.2 ( 3.00x) sad_16x8_c: 0.5 ( 1.00x) sad_16x8_neon: 0.2 ( 2.00x) sad_16x16_c: 1.5 ( 1.00x) sad_16x16_neon: 0.2 ( 6.00x) --- libavcodec/aarch64/vvc/Makefile | 1 + libavcodec/aarch64/vvc/dsp_init.c | 5 +++ libavcodec/aarch64/vvc/sad.S | 75 +++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 libavcodec/aarch64/vvc/sad.S
diff --git a/libavcodec/aarch64/vvc/Makefile b/libavcodec/aarch64/vvc/Makefile index a1c1f03e27..7ba13a2165 100644 --- a/libavcodec/aarch64/vvc/Makefile +++ b/libavcodec/aarch64/vvc/Makefile @@ -3,6 +3,7 @@ clean:: OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/dsp_init.o NEON-OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/alf.o \ + aarch64/vvc/sad.o \ aarch64/h26x/epel_neon.o \ aarch64/h26x/qpel_neon.o \ aarch64/h26x/sao_neon.o diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c index 934d918ffd..714d642634 100644 --- a/libavcodec/aarch64/vvc/dsp_init.c +++ b/libavcodec/aarch64/vvc/dsp_init.c @@ -39,6 +39,9 @@ #include "alf_template.c" #undef BIT_DEPTH +int ff_vvc_sad_neon(const int16_t *src0, const int16_t *src1, int dx, int dy, + const int block_w, const int block_h); + void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd) { int cpu_flags = av_get_cpu_flags(); @@ -125,4 +128,6 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd) c->alf.filter[LUMA] = alf_filter_luma_12_neon; c->alf.filter[CHROMA] = alf_filter_chroma_12_neon; } + + c->inter.sad = ff_vvc_sad_neon; } diff --git a/libavcodec/aarch64/vvc/sad.S b/libavcodec/aarch64/vvc/sad.S new file mode 100644 index 0000000000..beca876faf --- /dev/null +++ b/libavcodec/aarch64/vvc/sad.S @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024 Zhao Zhili <quinkbl...@foxmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/aarch64/asm.S" + +#define VVC_MAX_PB_SIZE 128 + +function ff_vvc_sad_neon, export=1 + src0 .req x0 + src1 .req x1 + dx .req w2 + dy .req w3 + block_w .req w4 + block_h .req w5 + + sub w7, dx, #4 + sub w8, dy, #4 + add w6, dx, dy, lsl #7 + add w7, w7, w8, lsl #7 + sxtw x6, w6 + sxtw x7, w7 + add src0, src0, x6, lsl #1 + sub src1, src1, x7, lsl #1 + + cmp block_w, #16 + movi v16.4s, #0 + b.ge 2f +1: + // block_w == 8 + ldr q0, [src0] + ldr q2, [src1] + subs block_h, block_h, #2 + sabal v16.4s, v0.4h, v2.4h + sabal2 v16.4s, v0.8h, v2.8h + + add src0, src0, #(2 * VVC_MAX_PB_SIZE * 2) + add src1, src1, #(2 * VVC_MAX_PB_SIZE * 2) + b.ne 1b + b 4f +2: + // block_w == 16, no block_w > 16 according the spec + movi v17.4s, #0 +3: + ldp q0, q1, [src0], #(2 * VVC_MAX_PB_SIZE * 2) + ldp q2, q3, [src1], #(2 * VVC_MAX_PB_SIZE * 2) + subs block_h, block_h, #2 + sabal v16.4s, v0.4h, v2.4h + sabal2 v16.4s, v0.8h, v2.8h + sabal v17.4s, v1.4h, v3.4h + sabal2 v17.4s, v1.8h, v3.8h + + b.ne 3b + add v16.4s, v16.4s, v17.4s +4: + addv s16, v16.4s + mov w0, v16.s[0] + ret +endfunc -- 2.42.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".