Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations

2017-12-20 Thread Ronald S. Bultje
Hi,

On Wed, Dec 20, 2017 at 4:58 PM, James Almer  wrote:

> On 12/17/2017 6:47 PM, Aurelien Jacobs wrote:
> > +;***
> > +;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
> > +;  uint32_t scale_factor[2][8],
> > +;  int blocks, int channels, int subbands)
> > +;***
> > +INIT_MMX mmx
> > +cglobal sbc_calc_scalefactors, 5, 7, 3, sb_sample_f, scale_factor,
> blocks, channels, subbands, ptr, blk
>
> On x86_64 (Windows at least), the high 32 bits of registers storing int
> arguments may contain garbage, so you need to work around it.


I think that's only for stack arguments, i.e. only subbands (not blocks or
channels).

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations

2017-12-20 Thread James Almer
On 12/17/2017 6:47 PM, Aurelien Jacobs wrote:
> This was originally based on libsbc, and was fully integrated into ffmpeg.
> ---
>  libavcodec/sbcdsp.c  |   3 +
>  libavcodec/sbcdsp.h  |   2 +
>  libavcodec/x86/Makefile  |   2 +
>  libavcodec/x86/sbcdsp.asm| 284 
> +++
>  libavcodec/x86/sbcdsp_init.c |  51 
>  5 files changed, 342 insertions(+)
>  create mode 100644 libavcodec/x86/sbcdsp.asm
>  create mode 100644 libavcodec/x86/sbcdsp_init.c
> 
> diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
> index 16faf5ba9b..9bb60cdd5e 100644
> --- a/libavcodec/sbcdsp.c
> +++ b/libavcodec/sbcdsp.c
> @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s)
>  /* Default implementation for scale factors calculation */
>  s->sbc_calc_scalefactors = sbc_calc_scalefactors;
>  s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
> +
> +if (ARCH_X86)
> +ff_sbcdsp_init_x86(s);
>  }
> diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
> index 66ed7d324e..127e6a8a11 100644
> --- a/libavcodec/sbcdsp.h
> +++ b/libavcodec/sbcdsp.h
> @@ -80,4 +80,6 @@ struct sbc_dsp_context {
>   */
>  void ff_sbcdsp_init(SBCDSPContext *s);
>  
> +void ff_sbcdsp_init_x86(SBCDSPContext *s);
> +
>  #endif /* AVCODEC_SBCDSP_H */
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index a805cd37b4..2350c8bbee 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
>  OBJS-$(CONFIG_PRORES_DECODER)  += x86/proresdsp_init.o
>  OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
>  OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o
> +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o
>  OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o
>  OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
>  OBJS-$(CONFIG_TRUEHD_DECODER)  += x86/mlpdsp_init.o
> @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER)  += x86/pngdsp.o
>  X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
>  X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
>  X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
> +X86ASM-OBJS-$(CONFIG_SBC_ENCODER)  += x86/sbcdsp.o
>  X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
>  X86ASM-OBJS-$(CONFIG_TAK_DECODER)  += x86/takdsp.o
>  X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o
> diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
> new file mode 100644
> index 00..00b48a821b
> --- /dev/null
> +++ b/libavcodec/x86/sbcdsp.asm
> @@ -0,0 +1,284 @@
> +;**
> +;* SIMD optimized SBC encoder DSP functions
> +;*
> +;* Copyright (C) 2017  Aurelien Jacobs 
> +;* Copyright (C) 2008-2010  Nokia Corporation
> +;* Copyright (C) 2004-2010  Marcel Holtmann 
> +;* Copyright (C) 2004-2005  Henryk Ploetz 
> +;* Copyright (C) 2005-2006  Brad Midgley 
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> +;**
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_RODATA
> +
> +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1)
> +
> +SECTION .text
> +
> +;***
> +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t 
> *consts);
> +;***
> +INIT_MMX mmx
> +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
> +movq  m0, [inq]
> +movq  m1, [inq+8]
> +pmaddwd   m0, [constsq]
> +pmaddwd   m1, [constsq+8]
> +paddd m0, [scale_mask]
> +paddd m1, [scale_mask]
> +
> +movq  m2, [inq+16]
> +movq  m3, [inq+24]
> +pmaddwd   m2, [constsq+16]
> +pmaddwd   m3, [constsq+24]
> +paddd m0, m2
> +paddd m1, m3
> +
> +movq  m2, [inq+32]
> +movq  m3, [inq+40]
> 

Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations

2017-12-20 Thread James Almer
On 12/20/2017 5:06 PM, Aurelien Jacobs wrote:
> On Wed, Dec 20, 2017 at 03:47:35PM -0300, James Almer wrote:
>> On 12/17/2017 6:47 PM, Aurelien Jacobs wrote:
>>> This was originally based on libsbc, and was fully integrated into ffmpeg.
>>> ---
>>>  libavcodec/sbcdsp.c  |   3 +
>>>  libavcodec/sbcdsp.h  |   2 +
>>>  libavcodec/x86/Makefile  |   2 +
>>>  libavcodec/x86/sbcdsp.asm| 284 
>>> +++
>>>  libavcodec/x86/sbcdsp_init.c |  51 
>>>  5 files changed, 342 insertions(+)
>>>  create mode 100644 libavcodec/x86/sbcdsp.asm
>>>  create mode 100644 libavcodec/x86/sbcdsp_init.c
>>>
>>> diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
>>> index 16faf5ba9b..9bb60cdd5e 100644
>>> --- a/libavcodec/sbcdsp.c
>>> +++ b/libavcodec/sbcdsp.c
>>> @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s)
>>>  /* Default implementation for scale factors calculation */
>>>  s->sbc_calc_scalefactors = sbc_calc_scalefactors;
>>>  s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
>>> +
>>> +if (ARCH_X86)
>>> +ff_sbcdsp_init_x86(s);
>>>  }
>>> diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
>>> index 66ed7d324e..127e6a8a11 100644
>>> --- a/libavcodec/sbcdsp.h
>>> +++ b/libavcodec/sbcdsp.h
>>> @@ -80,4 +80,6 @@ struct sbc_dsp_context {
>>>   */
>>>  void ff_sbcdsp_init(SBCDSPContext *s);
>>>  
>>> +void ff_sbcdsp_init_x86(SBCDSPContext *s);
>>> +
>>>  #endif /* AVCODEC_SBCDSP_H */
>>> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
>>> index a805cd37b4..2350c8bbee 100644
>>> --- a/libavcodec/x86/Makefile
>>> +++ b/libavcodec/x86/Makefile
>>> @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += 
>>> x86/pngdsp_init.o
>>>  OBJS-$(CONFIG_PRORES_DECODER)  += x86/proresdsp_init.o
>>>  OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
>>>  OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o
>>> +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o
>>>  OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o
>>>  OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
>>>  OBJS-$(CONFIG_TRUEHD_DECODER)  += x86/mlpdsp_init.o
>>> @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER)  += x86/pngdsp.o
>>>  X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
>>>  X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
>>>  X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
>>> +X86ASM-OBJS-$(CONFIG_SBC_ENCODER)  += x86/sbcdsp.o
>>>  X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
>>>  X86ASM-OBJS-$(CONFIG_TAK_DECODER)  += x86/takdsp.o
>>>  X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o
>>> diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
>>> new file mode 100644
>>> index 00..00b48a821b
>>> --- /dev/null
>>> +++ b/libavcodec/x86/sbcdsp.asm
>>> @@ -0,0 +1,284 @@
>>> +;**
>>> +;* SIMD optimized SBC encoder DSP functions
>>> +;*
>>> +;* Copyright (C) 2017  Aurelien Jacobs 
>>> +;* Copyright (C) 2008-2010  Nokia Corporation
>>> +;* Copyright (C) 2004-2010  Marcel Holtmann 
>>> +;* Copyright (C) 2004-2005  Henryk Ploetz 
>>> +;* Copyright (C) 2005-2006  Brad Midgley 
>>> +;*
>>> +;* This file is part of FFmpeg.
>>> +;*
>>> +;* FFmpeg is free software; you can redistribute it and/or
>>> +;* modify it under the terms of the GNU Lesser General Public
>>> +;* License as published by the Free Software Foundation; either
>>> +;* version 2.1 of the License, or (at your option) any later version.
>>> +;*
>>> +;* FFmpeg is distributed in the hope that it will be useful,
>>> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +;* Lesser General Public License for more details.
>>> +;*
>>> +;* You should have received a copy of the GNU Lesser General Public
>>> +;* License along with FFmpeg; if not, write to the Free Software
>>> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
>>> 02110-1301 USA
>>> +;**
>>> +
>>> +%include "libavutil/x86/x86util.asm"
>>> +
>>> +SECTION_RODATA
>>> +
>>> +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1)
>>> +
>>> +SECTION .text
>>> +
>>> +;***
>>> +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t 
>>> *consts);
>>> +;***
>>> +INIT_MMX mmx
>>> +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
>>> +movq  m0, [inq]
>>> +movq  m1, [inq+8]
>>> +pmaddwd   m0, [constsq]
>>> +pmaddwd   m1, [constsq+8]
>>> +paddd m0, 

Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations

2017-12-20 Thread Aurelien Jacobs
On Wed, Dec 20, 2017 at 03:47:35PM -0300, James Almer wrote:
> On 12/17/2017 6:47 PM, Aurelien Jacobs wrote:
> > This was originally based on libsbc, and was fully integrated into ffmpeg.
> > ---
> >  libavcodec/sbcdsp.c  |   3 +
> >  libavcodec/sbcdsp.h  |   2 +
> >  libavcodec/x86/Makefile  |   2 +
> >  libavcodec/x86/sbcdsp.asm| 284 
> > +++
> >  libavcodec/x86/sbcdsp_init.c |  51 
> >  5 files changed, 342 insertions(+)
> >  create mode 100644 libavcodec/x86/sbcdsp.asm
> >  create mode 100644 libavcodec/x86/sbcdsp_init.c
> > 
> > diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
> > index 16faf5ba9b..9bb60cdd5e 100644
> > --- a/libavcodec/sbcdsp.c
> > +++ b/libavcodec/sbcdsp.c
> > @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s)
> >  /* Default implementation for scale factors calculation */
> >  s->sbc_calc_scalefactors = sbc_calc_scalefactors;
> >  s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
> > +
> > +if (ARCH_X86)
> > +ff_sbcdsp_init_x86(s);
> >  }
> > diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
> > index 66ed7d324e..127e6a8a11 100644
> > --- a/libavcodec/sbcdsp.h
> > +++ b/libavcodec/sbcdsp.h
> > @@ -80,4 +80,6 @@ struct sbc_dsp_context {
> >   */
> >  void ff_sbcdsp_init(SBCDSPContext *s);
> >  
> > +void ff_sbcdsp_init_x86(SBCDSPContext *s);
> > +
> >  #endif /* AVCODEC_SBCDSP_H */
> > diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> > index a805cd37b4..2350c8bbee 100644
> > --- a/libavcodec/x86/Makefile
> > +++ b/libavcodec/x86/Makefile
> > @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += 
> > x86/pngdsp_init.o
> >  OBJS-$(CONFIG_PRORES_DECODER)  += x86/proresdsp_init.o
> >  OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
> >  OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o
> > +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o
> >  OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o
> >  OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
> >  OBJS-$(CONFIG_TRUEHD_DECODER)  += x86/mlpdsp_init.o
> > @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER)  += x86/pngdsp.o
> >  X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
> >  X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
> >  X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
> > +X86ASM-OBJS-$(CONFIG_SBC_ENCODER)  += x86/sbcdsp.o
> >  X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
> >  X86ASM-OBJS-$(CONFIG_TAK_DECODER)  += x86/takdsp.o
> >  X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o
> > diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
> > new file mode 100644
> > index 00..00b48a821b
> > --- /dev/null
> > +++ b/libavcodec/x86/sbcdsp.asm
> > @@ -0,0 +1,284 @@
> > +;**
> > +;* SIMD optimized SBC encoder DSP functions
> > +;*
> > +;* Copyright (C) 2017  Aurelien Jacobs 
> > +;* Copyright (C) 2008-2010  Nokia Corporation
> > +;* Copyright (C) 2004-2010  Marcel Holtmann 
> > +;* Copyright (C) 2004-2005  Henryk Ploetz 
> > +;* Copyright (C) 2005-2006  Brad Midgley 
> > +;*
> > +;* This file is part of FFmpeg.
> > +;*
> > +;* FFmpeg is free software; you can redistribute it and/or
> > +;* modify it under the terms of the GNU Lesser General Public
> > +;* License as published by the Free Software Foundation; either
> > +;* version 2.1 of the License, or (at your option) any later version.
> > +;*
> > +;* FFmpeg is distributed in the hope that it will be useful,
> > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +;* Lesser General Public License for more details.
> > +;*
> > +;* You should have received a copy of the GNU Lesser General Public
> > +;* License along with FFmpeg; if not, write to the Free Software
> > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
> > 02110-1301 USA
> > +;**
> > +
> > +%include "libavutil/x86/x86util.asm"
> > +
> > +SECTION_RODATA
> > +
> > +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1)
> > +
> > +SECTION .text
> > +
> > +;***
> > +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t 
> > *consts);
> > +;***
> > +INIT_MMX mmx
> > +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
> > +movq  m0, [inq]
> > +movq  m1, [inq+8]
> > +pmaddwd   m0, [constsq]
> > +pmaddwd   m1, [constsq+8]
> > +paddd m0, [scale_mask]
> > +paddd m1, 

Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations

2017-12-20 Thread James Almer
On 12/17/2017 6:47 PM, Aurelien Jacobs wrote:
> This was originally based on libsbc, and was fully integrated into ffmpeg.
> ---
>  libavcodec/sbcdsp.c  |   3 +
>  libavcodec/sbcdsp.h  |   2 +
>  libavcodec/x86/Makefile  |   2 +
>  libavcodec/x86/sbcdsp.asm| 284 
> +++
>  libavcodec/x86/sbcdsp_init.c |  51 
>  5 files changed, 342 insertions(+)
>  create mode 100644 libavcodec/x86/sbcdsp.asm
>  create mode 100644 libavcodec/x86/sbcdsp_init.c
> 
> diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
> index 16faf5ba9b..9bb60cdd5e 100644
> --- a/libavcodec/sbcdsp.c
> +++ b/libavcodec/sbcdsp.c
> @@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s)
>  /* Default implementation for scale factors calculation */
>  s->sbc_calc_scalefactors = sbc_calc_scalefactors;
>  s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
> +
> +if (ARCH_X86)
> +ff_sbcdsp_init_x86(s);
>  }
> diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
> index 66ed7d324e..127e6a8a11 100644
> --- a/libavcodec/sbcdsp.h
> +++ b/libavcodec/sbcdsp.h
> @@ -80,4 +80,6 @@ struct sbc_dsp_context {
>   */
>  void ff_sbcdsp_init(SBCDSPContext *s);
>  
> +void ff_sbcdsp_init_x86(SBCDSPContext *s);
> +
>  #endif /* AVCODEC_SBCDSP_H */
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index a805cd37b4..2350c8bbee 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
>  OBJS-$(CONFIG_PRORES_DECODER)  += x86/proresdsp_init.o
>  OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
>  OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o
> +OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o
>  OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o
>  OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
>  OBJS-$(CONFIG_TRUEHD_DECODER)  += x86/mlpdsp_init.o
> @@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER)  += x86/pngdsp.o
>  X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
>  X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
>  X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
> +X86ASM-OBJS-$(CONFIG_SBC_ENCODER)  += x86/sbcdsp.o
>  X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
>  X86ASM-OBJS-$(CONFIG_TAK_DECODER)  += x86/takdsp.o
>  X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o
> diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
> new file mode 100644
> index 00..00b48a821b
> --- /dev/null
> +++ b/libavcodec/x86/sbcdsp.asm
> @@ -0,0 +1,284 @@
> +;**
> +;* SIMD optimized SBC encoder DSP functions
> +;*
> +;* Copyright (C) 2017  Aurelien Jacobs 
> +;* Copyright (C) 2008-2010  Nokia Corporation
> +;* Copyright (C) 2004-2010  Marcel Holtmann 
> +;* Copyright (C) 2004-2005  Henryk Ploetz 
> +;* Copyright (C) 2005-2006  Brad Midgley 
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> +;**
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_RODATA
> +
> +scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1)
> +
> +SECTION .text
> +
> +;***
> +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t 
> *consts);
> +;***
> +INIT_MMX mmx
> +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
> +movq  m0, [inq]
> +movq  m1, [inq+8]
> +pmaddwd   m0, [constsq]
> +pmaddwd   m1, [constsq+8]
> +paddd m0, [scale_mask]
> +paddd m1, [scale_mask]
> +
> +movq  m2, [inq+16]
> +movq  m3, [inq+24]
> +pmaddwd   m2, [constsq+16]
> +pmaddwd   m3, [constsq+24]
> +paddd m0, m2
> +paddd m1, m3
> +
> +movq  m2, [inq+32]
> +movq  m3, [inq+40]
> 

Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations

2017-12-17 Thread Aurelien Jacobs
On Mon, Dec 18, 2017 at 12:21:00AM +0100, Carl Eugen Hoyos wrote:
> 2017-12-17 22:47 GMT+01:00 Aurelien Jacobs :
> > This was originally based on libsbc, and was fully integrated into ffmpeg.
> 
> Very rough numbers are useful in the commit message.

Here you go.>From 2227f8d8302fe37da00f34efacdf6b4941220330 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs 
Date: Sun, 17 Dec 2017 20:07:33 +0100
Subject: [PATCH 6/8] sbcenc: add MMX optimizations

This was originally based on libsbc, and was fully integrated into ffmpeg.

Rough speed test:
C version:speed= 592x
MMX version:  speed= 785x
---
 libavcodec/sbcdsp.c  |   3 +
 libavcodec/sbcdsp.h  |   2 +
 libavcodec/x86/Makefile  |   2 +
 libavcodec/x86/sbcdsp.asm| 284 +++
 libavcodec/x86/sbcdsp_init.c |  51 
 5 files changed, 342 insertions(+)
 create mode 100644 libavcodec/x86/sbcdsp.asm
 create mode 100644 libavcodec/x86/sbcdsp_init.c

diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
index 16faf5ba9b..9bb60cdd5e 100644
--- a/libavcodec/sbcdsp.c
+++ b/libavcodec/sbcdsp.c
@@ -387,4 +387,7 @@ av_cold void ff_sbcdsp_init(SBCDSPContext *s)
 /* Default implementation for scale factors calculation */
 s->sbc_calc_scalefactors = sbc_calc_scalefactors;
 s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
+
+if (ARCH_X86)
+ff_sbcdsp_init_x86(s);
 }
diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
index 66ed7d324e..127e6a8a11 100644
--- a/libavcodec/sbcdsp.h
+++ b/libavcodec/sbcdsp.h
@@ -80,4 +80,6 @@ struct sbc_dsp_context {
  */
 void ff_sbcdsp_init(SBCDSPContext *s);
 
+void ff_sbcdsp_init_x86(SBCDSPContext *s);
+
 #endif /* AVCODEC_SBCDSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a805cd37b4..2350c8bbee 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
 OBJS-$(CONFIG_PRORES_DECODER)  += x86/proresdsp_init.o
 OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)+= x86/rv40dsp_init.o
+OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o
 OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc_init.o
 OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)  += x86/mlpdsp_init.o
@@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER)  += x86/pngdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
+X86ASM-OBJS-$(CONFIG_SBC_ENCODER)  += x86/sbcdsp.o
 X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
 X86ASM-OBJS-$(CONFIG_TAK_DECODER)  += x86/takdsp.o
 X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o
diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
new file mode 100644
index 00..00b48a821b
--- /dev/null
+++ b/libavcodec/x86/sbcdsp.asm
@@ -0,0 +1,284 @@
+;**
+;* SIMD optimized SBC encoder DSP functions
+;*
+;* Copyright (C) 2017  Aurelien Jacobs 
+;* Copyright (C) 2008-2010  Nokia Corporation
+;* Copyright (C) 2004-2010  Marcel Holtmann 
+;* Copyright (C) 2004-2005  Henryk Ploetz 
+;* Copyright (C) 2005-2006  Brad Midgley 
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;**
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+scale_mask: times 2 dd 0x8000; 1 << (SBC_PROTO_FIXED_SCALE - 1)
+
+SECTION .text
+
+;***
+;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts);
+;***
+INIT_MMX mmx
+cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
+movq  m0, [inq]
+movq  m1, [inq+8]
+pmaddwd   m0, [constsq]
+pmaddwd   m1, [constsq+8]
+paddd m0, [scale_mask]
+paddd m1, 

Re: [FFmpeg-devel] [PATCH 6/8] sbcenc: add MMX optimizations

2017-12-17 Thread Carl Eugen Hoyos
2017-12-17 22:47 GMT+01:00 Aurelien Jacobs :
> This was originally based on libsbc, and was fully integrated into ffmpeg.

Very rough numbers are useful in the commit message.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel