Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations
Hi, On Wed, Dec 20, 2017 at 4:58 PM, James Almerwrote: > On 12/17/2017 6:47 PM, Aurelien Jacobs wrote: > > +;*** > > +;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8], > > +; uint32_t scale_factor[2][8], > > +; int blocks, int channels, int subbands) > > +;*** > > +INIT_MMX mmx > > +cglobal sbc_calc_scalefactors, 5, 7, 3, sb_sample_f, scale_factor, > blocks, channels, subbands, ptr, blk > > On x86_64 (Windows at least), the high 32 bits of registers storing int > arguments may contain garbage, so you need to work around it. I think that's only for stack arguments, i.e. only subbands (not blocks or channels). Ronald ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations
On 12/17/2017 6:47 PM, Aurelien Jacobs wrote: > This was originally based on libsbc, and was fully integrated into ffmpeg. > --- > libavcodec/sbcdsp.c | 3 + > libavcodec/sbcdsp.h | 2 + > libavcodec/x86/Makefile | 2 + > libavcodec/x86/sbcdsp.asm| 284 > +++ > libavcodec/x86/sbcdsp_init.c | 51 > 5 files changed, 342 insertions(+) > create mode 100644 libavcodec/x86/sbcdsp.asm > create mode 100644 libavcodec/x86/sbcdsp_init.c > > diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c > index 16faf5ba9b..9bb60cdd5e 100644 > --- a/libavcodec/sbcdsp.c > +++ b/libavcodec/sbcdsp.c > @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s) > /* Default implementation for scale factors calculation */ > s->sbc_calc_scalefactors = sbc_calc_scalefactors; > s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; > + > +if (ARCH_X86) > +ff_sbcdsp_init_x86(s); > } > diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h > index 66ed7d324e..127e6a8a11 100644 > --- a/libavcodec/sbcdsp.h > +++ b/libavcodec/sbcdsp.h > @@ -80,4 +80,6 @@ struct sbc_dsp_context { > */ > void ff_sbcdsp_init(SBCDSPContext *s); > > +void ff_sbcdsp_init_x86(SBCDSPContext *s); > + > #endif /* AVCODEC_SBCDSP_H */ > diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile > index a805cd37b4..2350c8bbee 100644 > --- a/libavcodec/x86/Makefile > +++ b/libavcodec/x86/Makefile > @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o > OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o > OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o > OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o > +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o > OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o > OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o > OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o > @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o > X86ASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o > X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o > X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o > +X86ASM-OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp.o > X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o > X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o > X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o > diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm > new file mode 100644 > index 00..00b48a821b > --- /dev/null > +++ b/libavcodec/x86/sbcdsp.asm > @@ -0,0 +1,284 @@ > +;** > +;* SIMD optimized SBC encoder DSP functions > +;* > +;* Copyright (C) 2017 Aurelien Jacobs> +;* Copyright (C) 2008-2010 Nokia Corporation > +;* Copyright (C) 2004-2010 Marcel Holtmann > +;* Copyright (C) 2004-2005 Henryk Ploetz > +;* Copyright (C) 2005-2006 Brad Midgley > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION_RODATA > + > +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1) > + > +SECTION .text > + > +;*** > +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t > *consts); > +;*** > +INIT_MMX mmx > +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts > +movq m0, [inq] > +movq m1, [inq+8] > +pmaddwd m0, [constsq] > +pmaddwd m1, [constsq+8] > +paddd m0, [scale_mask] > +paddd m1, [scale_mask] > + > +movq m2, [inq+16] > +movq m3, [inq+24] > +pmaddwd m2, [constsq+16] > +pmaddwd m3, [constsq+24] > +paddd m0, m2 > +paddd m1, m3 > + > +movq m2, [inq+32] > +movq m3, [inq+40] >
Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations
On 12/20/2017 5:06 PM, Aurelien Jacobs wrote: > On Wed, Dec 20, 2017 at 03:47:35PM -0300, James Almer wrote: >> On 12/17/2017 6:47 PM, Aurelien Jacobs wrote: >>> This was originally based on libsbc, and was fully integrated into ffmpeg. >>> --- >>> libavcodec/sbcdsp.c | 3 + >>> libavcodec/sbcdsp.h | 2 + >>> libavcodec/x86/Makefile | 2 + >>> libavcodec/x86/sbcdsp.asm| 284 >>> +++ >>> libavcodec/x86/sbcdsp_init.c | 51 >>> 5 files changed, 342 insertions(+) >>> create mode 100644 libavcodec/x86/sbcdsp.asm >>> create mode 100644 libavcodec/x86/sbcdsp_init.c >>> >>> diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c >>> index 16faf5ba9b..9bb60cdd5e 100644 >>> --- a/libavcodec/sbcdsp.c >>> +++ b/libavcodec/sbcdsp.c >>> @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s) >>> /* Default implementation for scale factors calculation */ >>> s->sbc_calc_scalefactors = sbc_calc_scalefactors; >>> s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; >>> + >>> +if (ARCH_X86) >>> +ff_sbcdsp_init_x86(s); >>> } >>> diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h >>> index 66ed7d324e..127e6a8a11 100644 >>> --- a/libavcodec/sbcdsp.h >>> +++ b/libavcodec/sbcdsp.h >>> @@ -80,4 +80,6 @@ struct sbc_dsp_context { >>> */ >>> void ff_sbcdsp_init(SBCDSPContext *s); >>> >>> +void ff_sbcdsp_init_x86(SBCDSPContext *s); >>> + >>> #endif /* AVCODEC_SBCDSP_H */ >>> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile >>> index a805cd37b4..2350c8bbee 100644 >>> --- a/libavcodec/x86/Makefile >>> +++ b/libavcodec/x86/Makefile >>> @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += >>> x86/pngdsp_init.o >>> OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o >>> OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o >>> OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o >>> +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o >>> OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o >>> OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o >>> OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o >>> @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o >>> X86ASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o >>> X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o >>> X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o >>> +X86ASM-OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp.o >>> X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o >>> X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o >>> X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o >>> diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm >>> new file mode 100644 >>> index 00..00b48a821b >>> --- /dev/null >>> +++ b/libavcodec/x86/sbcdsp.asm >>> @@ -0,0 +1,284 @@ >>> +;** >>> +;* SIMD optimized SBC encoder DSP functions >>> +;* >>> +;* Copyright (C) 2017 Aurelien Jacobs>>> +;* Copyright (C) 2008-2010 Nokia Corporation >>> +;* Copyright (C) 2004-2010 Marcel Holtmann >>> +;* Copyright (C) 2004-2005 Henryk Ploetz >>> +;* Copyright (C) 2005-2006 Brad Midgley >>> +;* >>> +;* This file is part of FFmpeg. >>> +;* >>> +;* FFmpeg is free software; you can redistribute it and/or >>> +;* modify it under the terms of the GNU Lesser General Public >>> +;* License as published by the Free Software Foundation; either >>> +;* version 2.1 of the License, or (at your option) any later version. >>> +;* >>> +;* FFmpeg is distributed in the hope that it will be useful, >>> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of >>> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> +;* Lesser General Public License for more details. >>> +;* >>> +;* You should have received a copy of the GNU Lesser General Public >>> +;* License along with FFmpeg; if not, write to the Free Software >>> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >>> 02110-1301 USA >>> +;** >>> + >>> +%include "libavutil/x86/x86util.asm" >>> + >>> +SECTION_RODATA >>> + >>> +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1) >>> + >>> +SECTION .text >>> + >>> +;*** >>> +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t >>> *consts); >>> +;*** >>> +INIT_MMX mmx >>> +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts >>> +movq m0, [inq] >>> +movq m1, [inq+8] >>> +pmaddwd m0, [constsq] >>> +pmaddwd m1, [constsq+8] >>> +paddd m0,
Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations
On Wed, Dec 20, 2017 at 03:47:35PM -0300, James Almer wrote: > On 12/17/2017 6:47 PM, Aurelien Jacobs wrote: > > This was originally based on libsbc, and was fully integrated into ffmpeg. > > --- > > libavcodec/sbcdsp.c | 3 + > > libavcodec/sbcdsp.h | 2 + > > libavcodec/x86/Makefile | 2 + > > libavcodec/x86/sbcdsp.asm| 284 > > +++ > > libavcodec/x86/sbcdsp_init.c | 51 > > 5 files changed, 342 insertions(+) > > create mode 100644 libavcodec/x86/sbcdsp.asm > > create mode 100644 libavcodec/x86/sbcdsp_init.c > > > > diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c > > index 16faf5ba9b..9bb60cdd5e 100644 > > --- a/libavcodec/sbcdsp.c > > +++ b/libavcodec/sbcdsp.c > > @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s) > > /* Default implementation for scale factors calculation */ > > s->sbc_calc_scalefactors = sbc_calc_scalefactors; > > s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; > > + > > +if (ARCH_X86) > > +ff_sbcdsp_init_x86(s); > > } > > diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h > > index 66ed7d324e..127e6a8a11 100644 > > --- a/libavcodec/sbcdsp.h > > +++ b/libavcodec/sbcdsp.h > > @@ -80,4 +80,6 @@ struct sbc_dsp_context { > > */ > > void ff_sbcdsp_init(SBCDSPContext *s); > > > > +void ff_sbcdsp_init_x86(SBCDSPContext *s); > > + > > #endif /* AVCODEC_SBCDSP_H */ > > diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile > > index a805cd37b4..2350c8bbee 100644 > > --- a/libavcodec/x86/Makefile > > +++ b/libavcodec/x86/Makefile > > @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += > > x86/pngdsp_init.o > > OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o > > OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o > > OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o > > +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o > > OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o > > OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o > > OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o > > @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o > > X86ASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o > > X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o > > X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o > > +X86ASM-OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp.o > > X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o > > X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o > > X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o > > diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm > > new file mode 100644 > > index 00..00b48a821b > > --- /dev/null > > +++ b/libavcodec/x86/sbcdsp.asm > > @@ -0,0 +1,284 @@ > > +;** > > +;* SIMD optimized SBC encoder DSP functions > > +;* > > +;* Copyright (C) 2017 Aurelien Jacobs> > +;* Copyright (C) 2008-2010 Nokia Corporation > > +;* Copyright (C) 2004-2010 Marcel Holtmann > > +;* Copyright (C) 2004-2005 Henryk Ploetz > > +;* Copyright (C) 2005-2006 Brad Midgley > > +;* > > +;* This file is part of FFmpeg. > > +;* > > +;* FFmpeg is free software; you can redistribute it and/or > > +;* modify it under the terms of the GNU Lesser General Public > > +;* License as published by the Free Software Foundation; either > > +;* version 2.1 of the License, or (at your option) any later version. > > +;* > > +;* FFmpeg is distributed in the hope that it will be useful, > > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > +;* Lesser General Public License for more details. > > +;* > > +;* You should have received a copy of the GNU Lesser General Public > > +;* License along with FFmpeg; if not, write to the Free Software > > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > > 02110-1301 USA > > +;** > > + > > +%include "libavutil/x86/x86util.asm" > > + > > +SECTION_RODATA > > + > > +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1) > > + > > +SECTION .text > > + > > +;*** > > +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t > > *consts); > > +;*** > > +INIT_MMX mmx > > +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts > > +movq m0, [inq] > > +movq m1, [inq+8] > > +pmaddwd m0, [constsq] > > +pmaddwd m1, [constsq+8] > > +paddd m0, [scale_mask] > > +paddd m1,
Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations
On 12/17/2017 6:47 PM, Aurelien Jacobs wrote: > This was originally based on libsbc, and was fully integrated into ffmpeg. > --- > libavcodec/sbcdsp.c | 3 + > libavcodec/sbcdsp.h | 2 + > libavcodec/x86/Makefile | 2 + > libavcodec/x86/sbcdsp.asm| 284 > +++ > libavcodec/x86/sbcdsp_init.c | 51 > 5 files changed, 342 insertions(+) > create mode 100644 libavcodec/x86/sbcdsp.asm > create mode 100644 libavcodec/x86/sbcdsp_init.c > > diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c > index 16faf5ba9b..9bb60cdd5e 100644 > --- a/libavcodec/sbcdsp.c > +++ b/libavcodec/sbcdsp.c > @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s) > /* Default implementation for scale factors calculation */ > s->sbc_calc_scalefactors = sbc_calc_scalefactors; > s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; > + > +if (ARCH_X86) > +ff_sbcdsp_init_x86(s); > } > diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h > index 66ed7d324e..127e6a8a11 100644 > --- a/libavcodec/sbcdsp.h > +++ b/libavcodec/sbcdsp.h > @@ -80,4 +80,6 @@ struct sbc_dsp_context { > */ > void ff_sbcdsp_init(SBCDSPContext *s); > > +void ff_sbcdsp_init_x86(SBCDSPContext *s); > + > #endif /* AVCODEC_SBCDSP_H */ > diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile > index a805cd37b4..2350c8bbee 100644 > --- a/libavcodec/x86/Makefile > +++ b/libavcodec/x86/Makefile > @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o > OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o > OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o > OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o > +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o > OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o > OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o > OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o > @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o > X86ASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o > X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o > X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o > +X86ASM-OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp.o > X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o > X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o > X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o > diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm > new file mode 100644 > index 00..00b48a821b > --- /dev/null > +++ b/libavcodec/x86/sbcdsp.asm > @@ -0,0 +1,284 @@ > +;** > +;* SIMD optimized SBC encoder DSP functions > +;* > +;* Copyright (C) 2017 Aurelien Jacobs> +;* Copyright (C) 2008-2010 Nokia Corporation > +;* Copyright (C) 2004-2010 Marcel Holtmann > +;* Copyright (C) 2004-2005 Henryk Ploetz > +;* Copyright (C) 2005-2006 Brad Midgley > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION_RODATA > + > +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1) > + > +SECTION .text > + > +;*** > +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t > *consts); > +;*** > +INIT_MMX mmx > +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts > +movq m0, [inq] > +movq m1, [inq+8] > +pmaddwd m0, [constsq] > +pmaddwd m1, [constsq+8] > +paddd m0, [scale_mask] > +paddd m1, [scale_mask] > + > +movq m2, [inq+16] > +movq m3, [inq+24] > +pmaddwd m2, [constsq+16] > +pmaddwd m3, [constsq+24] > +paddd m0, m2 > +paddd m1, m3 > + > +movq m2, [inq+32] > +movq m3, [inq+40] >
Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations
On Mon, Dec 18, 2017 at 12:21:00AM +0100, Carl Eugen Hoyos wrote: > 2017-12-17 22:47 GMT+01:00 Aurelien Jacobs: > > This was originally based on libsbc, and was fully integrated into ffmpeg. > > Very rough numbers are useful in the commit message. Here you go.>From 2227f8d8302fe37da00f34efacdf6b4941220330 Mon Sep 17 00:00:00 2001 From: Aurelien Jacobs Date: Sun, 17 Dec 2017 20:07:33 +0100 Subject: [PATCH 6/8] sbcenc: add MMX optimizations This was originally based on libsbc, and was fully integrated into ffmpeg. Rough speed test: C version:speed= 592x MMX version: speed= 785x --- libavcodec/sbcdsp.c | 3 + libavcodec/sbcdsp.h | 2 + libavcodec/x86/Makefile | 2 + libavcodec/x86/sbcdsp.asm| 284 +++ libavcodec/x86/sbcdsp_init.c | 51 5 files changed, 342 insertions(+) create mode 100644 libavcodec/x86/sbcdsp.asm create mode 100644 libavcodec/x86/sbcdsp_init.c diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c index 16faf5ba9b..9bb60cdd5e 100644 --- a/libavcodec/sbcdsp.c +++ b/libavcodec/sbcdsp.c @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s) /* Default implementation for scale factors calculation */ s->sbc_calc_scalefactors = sbc_calc_scalefactors; s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; + +if (ARCH_X86) +ff_sbcdsp_init_x86(s); } diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h index 66ed7d324e..127e6a8a11 100644 --- a/libavcodec/sbcdsp.h +++ b/libavcodec/sbcdsp.h @@ -80,4 +80,6 @@ struct sbc_dsp_context { */ void ff_sbcdsp_init(SBCDSPContext *s); +void ff_sbcdsp_init_x86(SBCDSPContext *s); + #endif /* AVCODEC_SBCDSP_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index a805cd37b4..2350c8bbee 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o X86ASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o +X86ASM-OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp.o X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm new file mode 100644 index 00..00b48a821b --- /dev/null +++ b/libavcodec/x86/sbcdsp.asm @@ -0,0 +1,284 @@ +;** +;* SIMD optimized SBC encoder DSP functions +;* +;* Copyright (C) 2017 Aurelien Jacobs +;* Copyright (C) 2008-2010 Nokia Corporation +;* Copyright (C) 2004-2010 Marcel Holtmann +;* Copyright (C) 2004-2005 Henryk Ploetz +;* Copyright (C) 2005-2006 Brad Midgley +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1) + +SECTION .text + +;*** +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts); +;*** +INIT_MMX mmx +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts +movq m0, [inq] +movq m1, [inq+8] +pmaddwd m0, [constsq] +pmaddwd m1, [constsq+8] +paddd m0, [scale_mask] +paddd m1,
Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations
2017-12-17 22:47 GMT+01:00 Aurelien Jacobs: > This was originally based on libsbc, and was fully integrated into ffmpeg. Very rough numbers are useful in the commit message. Carl Eugen ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel