[FFmpeg-devel] [PATCH 8/9] sbcenc: add armv6 and neon asm optimizations
This was originally based on libsbc, and was fully integrated into ffmpeg. --- libavcodec/arm/Makefile | 3 + libavcodec/arm/sbcdsp_armv6.S| 245 ++ libavcodec/arm/sbcdsp_init_arm.c | 105 ++ libavcodec/arm/sbcdsp_neon.S | 714 +++ libavcodec/sbcdsp.c | 2 + libavcodec/sbcdsp.h | 1 + 6 files changed, 1070 insertions(+) create mode 100644 libavcodec/arm/sbcdsp_armv6.S create mode 100644 libavcodec/arm/sbcdsp_init_arm.c create mode 100644 libavcodec/arm/sbcdsp_neon.S diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 1eeac5449e..fd2401f4e5 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -42,6 +42,7 @@ OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_init_arm.o OBJS-$(CONFIG_HEVC_DECODER)+= arm/hevcdsp_init_arm.o OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_init_arm.o OBJS-$(CONFIG_RV40_DECODER)+= arm/rv40dsp_init_arm.o +OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_init_arm.o OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_init_arm.o OBJS-$(CONFIG_VP9_DECODER) += arm/vp9dsp_init_10bpp_arm.o \ @@ -81,6 +82,7 @@ ARMV6-OBJS-$(CONFIG_VP8DSP)+= arm/vp8_armv6.o \ # decoders/encoders ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o +ARMV6-OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_armv6.o # VFP optimizations @@ -140,6 +142,7 @@ NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o\ arm/rv40dsp_neon.o +NEON-OBJS-$(CONFIG_SBC_ENCODER)+= arm/sbcdsp_neon.o NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o NEON-OBJS-$(CONFIG_VP6_DECODER)+= arm/vp6dsp_neon.o NEON-OBJS-$(CONFIG_VP9_DECODER)+= arm/vp9itxfm_16bpp_neon.o \ diff --git a/libavcodec/arm/sbcdsp_armv6.S b/libavcodec/arm/sbcdsp_armv6.S new file mode 100644 index 00..f1ff845798 --- /dev/null +++ b/libavcodec/arm/sbcdsp_armv6.S @@ -0,0 +1,245 @@ +/* + * Bluetooth low-complexity, subband codec (SBC) + * + * Copyright (C) 2017 Aurelien Jacobs + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. + */ + +#include "libavutil/arm/asm.S" + +function ff_sbc_analyze_4_armv6, export=1 +@ r0 = in, r1 = out, r2 = consts +push{r1, r3-r7, lr} +push{r8-r12, r14} +ldrdr4, r5, [r0, #0] +ldrdr6, r7, [r2, #0] +ldrdr8, r9, [r0, #16] +ldrdr10, r11, [r2, #16] +mov r14, #0x8000 +smlad r3, r4, r6, r14 +smlad r12, r5, r7, r14 +ldrdr4, r5, [r0, #32] +ldrdr6, r7, [r2, #32] +smlad r3, r8, r10, r3 +smlad r12, r9, r11, r12 +ldrdr8, r9, [r0, #48] +ldrdr10, r11, [r2, #48] +smlad r3, r4, r6, r3 +smlad r12, r5, r7, r12 +ldrdr4, r5, [r0, #64] +ldrdr6, r7, [r2, #64] +smlad r3, r8, r10, r3 +smlad r12, r9, r11, r12 +ldrdr8, r9, [r0, #8] +ldrdr10, r11, [r2, #8] +smlad r3, r4, r6, r3@ t1[0] is done +smlad r12, r5, r7, r12 @ t1[1] is done +ldrdr4, r5, [r0, #24] +ldrdr6, r7, [r2, #24] +pkhtb r3, r12, r3, asr #16@ combine t1[0] and t1[1] +smlad r12, r8, r10, r14 +smlad r14, r9, r11, r14 +ldrd
[FFmpeg-devel] [PATCH 8/9] sbcenc: add armv6 and neon asm optimizations
This was originally based on libsbc, and was fully integrated into ffmpeg. --- libavcodec/arm/Makefile | 3 + libavcodec/arm/sbcdsp_armv6.S| 245 ++ libavcodec/arm/sbcdsp_init_arm.c | 105 ++ libavcodec/arm/sbcdsp_neon.S | 714 +++ libavcodec/sbcdsp.c | 2 + libavcodec/sbcdsp.h | 1 + 6 files changed, 1070 insertions(+) create mode 100644 libavcodec/arm/sbcdsp_armv6.S create mode 100644 libavcodec/arm/sbcdsp_init_arm.c create mode 100644 libavcodec/arm/sbcdsp_neon.S diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 1eeac5449e..fd2401f4e5 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -42,6 +42,7 @@ OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_init_arm.o OBJS-$(CONFIG_HEVC_DECODER)+= arm/hevcdsp_init_arm.o OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_init_arm.o OBJS-$(CONFIG_RV40_DECODER)+= arm/rv40dsp_init_arm.o +OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_init_arm.o OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_init_arm.o OBJS-$(CONFIG_VP9_DECODER) += arm/vp9dsp_init_10bpp_arm.o \ @@ -81,6 +82,7 @@ ARMV6-OBJS-$(CONFIG_VP8DSP)+= arm/vp8_armv6.o \ # decoders/encoders ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o +ARMV6-OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_armv6.o # VFP optimizations @@ -140,6 +142,7 @@ NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o\ arm/rv40dsp_neon.o +NEON-OBJS-$(CONFIG_SBC_ENCODER)+= arm/sbcdsp_neon.o NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o NEON-OBJS-$(CONFIG_VP6_DECODER)+= arm/vp6dsp_neon.o NEON-OBJS-$(CONFIG_VP9_DECODER)+= arm/vp9itxfm_16bpp_neon.o \ diff --git a/libavcodec/arm/sbcdsp_armv6.S b/libavcodec/arm/sbcdsp_armv6.S new file mode 100644 index 00..f1ff845798 --- /dev/null +++ b/libavcodec/arm/sbcdsp_armv6.S @@ -0,0 +1,245 @@ +/* + * Bluetooth low-complexity, subband codec (SBC) + * + * Copyright (C) 2017 Aurelien Jacobs + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. + */ + +#include "libavutil/arm/asm.S" + +function ff_sbc_analyze_4_armv6, export=1 +@ r0 = in, r1 = out, r2 = consts +push{r1, r3-r7, lr} +push{r8-r12, r14} +ldrdr4, r5, [r0, #0] +ldrdr6, r7, [r2, #0] +ldrdr8, r9, [r0, #16] +ldrdr10, r11, [r2, #16] +mov r14, #0x8000 +smlad r3, r4, r6, r14 +smlad r12, r5, r7, r14 +ldrdr4, r5, [r0, #32] +ldrdr6, r7, [r2, #32] +smlad r3, r8, r10, r3 +smlad r12, r9, r11, r12 +ldrdr8, r9, [r0, #48] +ldrdr10, r11, [r2, #48] +smlad r3, r4, r6, r3 +smlad r12, r5, r7, r12 +ldrdr4, r5, [r0, #64] +ldrdr6, r7, [r2, #64] +smlad r3, r8, r10, r3 +smlad r12, r9, r11, r12 +ldrdr8, r9, [r0, #8] +ldrdr10, r11, [r2, #8] +smlad r3, r4, r6, r3@ t1[0] is done +smlad r12, r5, r7, r12 @ t1[1] is done +ldrdr4, r5, [r0, #24] +ldrdr6, r7, [r2, #24] +pkhtb r3, r12, r3, asr #16@ combine t1[0] and t1[1] +smlad r12, r8, r10, r14 +smlad r14, r9, r11, r14 +ldrd