[FFmpeg-devel] [PATCH] x86/flacdsp: add SSE2 and AVX decorrelate functions

2014-11-03 Thread James Almer
Two to four times faster depending on instruction set, block size and channel 
count.

Signed-off-by: James Almer jamr...@gmail.com
---
Now also with 16 bits indep4 and indep6.

 libavcodec/arm/flacdsp_init_arm.c |   2 +-
 libavcodec/flacdec.c  |   6 +-
 libavcodec/flacdsp.c  |   6 +-
 libavcodec/flacdsp.h  |   6 +-
 libavcodec/flacenc.c  |   2 +-
 libavcodec/x86/flacdsp.asm| 261 ++
 libavcodec/x86/flacdsp_init.c |  52 +++-
 7 files changed, 323 insertions(+), 12 deletions(-)

diff --git a/libavcodec/arm/flacdsp_init_arm.c 
b/libavcodec/arm/flacdsp_init_arm.c
index 9ddb268..df1b19c 100644
--- a/libavcodec/arm/flacdsp_init_arm.c
+++ b/libavcodec/arm/flacdsp_init_arm.c
@@ -24,7 +24,7 @@
 void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
 int qlevel, int len);
 
-av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt,
+av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, 
int channels,
  int bps)
 {
 if (bps = 16  CONFIG_FLAC_DECODER)
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 1333972..01ae717 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -111,7 +111,7 @@ static av_cold int flac_decode_init(AVCodecContext *avctx)
 if (ret  0)
 return ret;
 flac_set_bps(s);
-ff_flacdsp_init(s-dsp, avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 
 return 0;
@@ -173,7 +173,7 @@ static int parse_streaminfo(FLACContext *s, const uint8_t 
*buf, int buf_size)
 if (ret  0)
 return ret;
 flac_set_bps(s);
-ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 
 return 0;
@@ -472,7 +472,7 @@ static int decode_frame(FLACContext *s)
 ret = allocate_buffers(s);
 if (ret  0)
 return ret;
-ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 dump_headers(s-avctx, (FLACStreaminfo *)s);
 }
diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index b15bc74..a83eb83 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -85,7 +85,7 @@ static void flac_lpc_32_c(int32_t *decoded, const int 
coeffs[32],
 
 }
 
-av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt,
+av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels,
  int bps)
 {
 if (bps  16) {
@@ -127,7 +127,7 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum 
AVSampleFormat fmt,
 }
 
 if (ARCH_ARM)
-ff_flacdsp_init_arm(c, fmt, bps);
+ff_flacdsp_init_arm(c, fmt, channels, bps);
 if (ARCH_X86)
-ff_flacdsp_init_x86(c, fmt, bps);
+ff_flacdsp_init_x86(c, fmt, channels, bps);
 }
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 14f3466..417381c 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -31,8 +31,8 @@ typedef struct FLACDSPContext {
const int32_t coefs[32], int shift);
 } FLACDSPContext;
 
-void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
-void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
-void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
+void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, 
int bps);
+void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels, int bps);
+void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels, int bps);
 
 #endif /* AVCODEC_FLACDSP_H */
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 3b72888..e66ef3d 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -428,7 +428,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
   s-options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
 
 ff_bswapdsp_init(s-bdsp);
-ff_flacdsp_init(s-flac_dsp, avctx-sample_fmt,
+ff_flacdsp_init(s-flac_dsp, avctx-sample_fmt, channels,
 avctx-bits_per_raw_sample);
 
 dprint_compression_options(s);
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 37ee87b..afbe199 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -72,3 +72,264 @@ ALIGN 16
 LPC_32 xop
 %endif
 LPC_32 sse4
+
+;-
+;void ff_flac_decorrelate_{ls,rs,ms,indep2}_16_sse2(uint8_t **out, int32_t 
**in, int channels,
+;   int len, int shift);

[FFmpeg-devel] [PATCH] x86/flacdsp: add SSE2 and AVX decorrelate functions

2014-11-02 Thread James Almer
Two to four times faster depending on instruction set, block size and channel 
count.

Signed-off-by: James Almer jamr...@gmail.com
---
TODO: 16 bits indep for 4, 6 and 8 channels. 24/32 bits indep for 8 channels.
  AVX2 and maybe MMX versions.
  Planar?

 libavcodec/arm/flacdsp_init_arm.c |   2 +-
 libavcodec/flacdec.c  |   6 +-
 libavcodec/flacdsp.c  |   6 +-
 libavcodec/flacdsp.h  |   6 +-
 libavcodec/flacenc.c  |   2 +-
 libavcodec/x86/flacdsp.asm| 206 ++
 libavcodec/x86/flacdsp_init.c |  48 -
 7 files changed, 264 insertions(+), 12 deletions(-)

diff --git a/libavcodec/arm/flacdsp_init_arm.c 
b/libavcodec/arm/flacdsp_init_arm.c
index 9ddb268..df1b19c 100644
--- a/libavcodec/arm/flacdsp_init_arm.c
+++ b/libavcodec/arm/flacdsp_init_arm.c
@@ -24,7 +24,7 @@
 void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
 int qlevel, int len);
 
-av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt,
+av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, 
int channels,
  int bps)
 {
 if (bps = 16  CONFIG_FLAC_DECODER)
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 1333972..01ae717 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -111,7 +111,7 @@ static av_cold int flac_decode_init(AVCodecContext *avctx)
 if (ret  0)
 return ret;
 flac_set_bps(s);
-ff_flacdsp_init(s-dsp, avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 
 return 0;
@@ -173,7 +173,7 @@ static int parse_streaminfo(FLACContext *s, const uint8_t 
*buf, int buf_size)
 if (ret  0)
 return ret;
 flac_set_bps(s);
-ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 
 return 0;
@@ -472,7 +472,7 @@ static int decode_frame(FLACContext *s)
 ret = allocate_buffers(s);
 if (ret  0)
 return ret;
-ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 dump_headers(s-avctx, (FLACStreaminfo *)s);
 }
diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index b15bc74..a83eb83 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -85,7 +85,7 @@ static void flac_lpc_32_c(int32_t *decoded, const int 
coeffs[32],
 
 }
 
-av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt,
+av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels,
  int bps)
 {
 if (bps  16) {
@@ -127,7 +127,7 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum 
AVSampleFormat fmt,
 }
 
 if (ARCH_ARM)
-ff_flacdsp_init_arm(c, fmt, bps);
+ff_flacdsp_init_arm(c, fmt, channels, bps);
 if (ARCH_X86)
-ff_flacdsp_init_x86(c, fmt, bps);
+ff_flacdsp_init_x86(c, fmt, channels, bps);
 }
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 14f3466..417381c 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -31,8 +31,8 @@ typedef struct FLACDSPContext {
const int32_t coefs[32], int shift);
 } FLACDSPContext;
 
-void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
-void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
-void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
+void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, 
int bps);
+void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels, int bps);
+void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels, int bps);
 
 #endif /* AVCODEC_FLACDSP_H */
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 3b72888..e66ef3d 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -428,7 +428,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
   s-options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
 
 ff_bswapdsp_init(s-bdsp);
-ff_flacdsp_init(s-flac_dsp, avctx-sample_fmt,
+ff_flacdsp_init(s-flac_dsp, avctx-sample_fmt, channels,
 avctx-bits_per_raw_sample);
 
 dprint_compression_options(s);
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 37ee87b..35f9b81 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -72,3 +72,209 @@ ALIGN 16
 LPC_32 xop
 %endif
 LPC_32 sse4
+
+;-
+;void ff_flac_decorrelate_{ls,rs,ms,indep2}_16_sse2(uint8_t **out, int32_t 
**in, int channels,
+;

Re: [FFmpeg-devel] [PATCH] x86/flacdsp: add SSE2 and AVX decorrelate functions

2014-11-02 Thread Clément Bœsch
On Sun, Nov 02, 2014 at 07:31:48PM -0300, James Almer wrote:
 Two to four times faster depending on instruction set, block size and channel 
 count.
 
 Signed-off-by: James Almer jamr...@gmail.com
 ---
 TODO: 16 bits indep for 4, 6 and 8 channels. 24/32 bits indep for 8 channels.
   AVX2 and maybe MMX versions.
   Planar?
 
  libavcodec/arm/flacdsp_init_arm.c |   2 +-
  libavcodec/flacdec.c  |   6 +-
  libavcodec/flacdsp.c  |   6 +-
  libavcodec/flacdsp.h  |   6 +-
  libavcodec/flacenc.c  |   2 +-
  libavcodec/x86/flacdsp.asm| 206 
 ++
  libavcodec/x86/flacdsp_init.c |  48 -
  7 files changed, 264 insertions(+), 12 deletions(-)
[...]
 +mova   m0, [in0q]
 +mova   m1, [in0q+in1q]
 +%if %1  2
 +mova   m2, [in0q+in2q]
 +mova   m3, [in0q+in3q]
 +%if %1  4
 +mova   m4, [in0q+in4q]
 +mova   m5, [in0q+in5q]
 +%endif
 +%endif
 +pslld  m0, m%2
 +pslld  m1, m%2
 +%if %1  2
 +pslld  m2, m%2
 +pslld  m3, m%2
 +%if %1  4
 +pslld  m4, m%2
 +pslld  m5, m%2
 +%endif
 +%endif

Can't you do something like this? (untested)
pslld  m0, [in0q], m%2
%assign i 0
%rep %1
pslld  m%i, [in0q+in%iq], m%2
%assigni i+1
%endrep

[...]

-- 
Clément B.


pgphQeDWNz8un.pgp
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] x86/flacdsp: add SSE2 and AVX decorrelate functions

2014-11-02 Thread James Almer
On 02/11/14 7:43 PM, Clément Bœsch wrote:
 On Sun, Nov 02, 2014 at 07:31:48PM -0300, James Almer wrote:
 Two to four times faster depending on instruction set, block size and 
 channel count.

 Signed-off-by: James Almer jamr...@gmail.com
 ---
 TODO: 16 bits indep for 4, 6 and 8 channels. 24/32 bits indep for 8 channels.
   AVX2 and maybe MMX versions.
   Planar?

  libavcodec/arm/flacdsp_init_arm.c |   2 +-
  libavcodec/flacdec.c  |   6 +-
  libavcodec/flacdsp.c  |   6 +-
  libavcodec/flacdsp.h  |   6 +-
  libavcodec/flacenc.c  |   2 +-
  libavcodec/x86/flacdsp.asm| 206 
 ++
  libavcodec/x86/flacdsp_init.c |  48 -
  7 files changed, 264 insertions(+), 12 deletions(-)
 [...]
 +mova   m0, [in0q]
 +mova   m1, [in0q+in1q]
 +%if %1  2
 +mova   m2, [in0q+in2q]
 +mova   m3, [in0q+in3q]
 +%if %1  4
 +mova   m4, [in0q+in4q]
 +mova   m5, [in0q+in5q]
 +%endif
 +%endif
 +pslld  m0, m%2
 +pslld  m1, m%2
 +%if %1  2
 +pslld  m2, m%2
 +pslld  m3, m%2
 +%if %1  4
 +pslld  m4, m%2
 +pslld  m5, m%2
 +%endif
 +%endif
 
 Can't you do something like this? (untested)
 pslld  m0, [in0q], m%2
 %assign i 0
 %rep %1
 pslld  m%i, [in0q+in%iq], m%2
 %assigni i+1
 %endrep

YASMlibavcodec/x86/flacdsp.o
D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
symbol `m' (first use)
D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
symbol `i' (first use)
D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
symbol `in' (first use)
D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
symbol `iq' (first use)
D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error:  (Each 
undefined symbol is reported only once.)
make: *** [libavcodec/x86/flacdsp.o] Error 1

A %rep like that is only four lines shorter. Do you consider it more readable 
than the alternative to justify trying 
to get it working?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] x86/flacdsp: add SSE2 and AVX decorrelate functions

2014-11-02 Thread Clément Bœsch
On Sun, Nov 02, 2014 at 07:55:35PM -0300, James Almer wrote:
 On 02/11/14 7:43 PM, Clément Bœsch wrote:
  On Sun, Nov 02, 2014 at 07:31:48PM -0300, James Almer wrote:
  Two to four times faster depending on instruction set, block size and 
  channel count.
 
  Signed-off-by: James Almer jamr...@gmail.com
  ---
  TODO: 16 bits indep for 4, 6 and 8 channels. 24/32 bits indep for 8 
  channels.
AVX2 and maybe MMX versions.
Planar?
 
   libavcodec/arm/flacdsp_init_arm.c |   2 +-
   libavcodec/flacdec.c  |   6 +-
   libavcodec/flacdsp.c  |   6 +-
   libavcodec/flacdsp.h  |   6 +-
   libavcodec/flacenc.c  |   2 +-
   libavcodec/x86/flacdsp.asm| 206 
  ++
   libavcodec/x86/flacdsp_init.c |  48 -
   7 files changed, 264 insertions(+), 12 deletions(-)
  [...]
  +mova   m0, [in0q]
  +mova   m1, [in0q+in1q]
  +%if %1  2
  +mova   m2, [in0q+in2q]
  +mova   m3, [in0q+in3q]
  +%if %1  4
  +mova   m4, [in0q+in4q]
  +mova   m5, [in0q+in5q]
  +%endif
  +%endif
  +pslld  m0, m%2
  +pslld  m1, m%2
  +%if %1  2
  +pslld  m2, m%2
  +pslld  m3, m%2
  +%if %1  4
  +pslld  m4, m%2
  +pslld  m5, m%2
  +%endif
  +%endif
  
  Can't you do something like this? (untested)
  pslld  m0, [in0q], m%2
  %assign i 0
  %rep %1
  pslld  m%i, [in0q+in%iq], m%2
  %assigni i+1
  %endrep
 
 YASMlibavcodec/x86/flacdsp.o
 D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
 symbol `m' (first use)
 D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
 symbol `i' (first use)
 D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
 symbol `in' (first use)
 D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error: undefined 
 symbol `iq' (first use)
 D:/MinGW/msys/1.0/ffmpeg/src/libavcodec/x86/flacdsp.asm:271: error:  (Each 
 undefined symbol is reported only once.)
 make: *** [libavcodec/x86/flacdsp.o] Error 1
 
 A %rep like that is only four lines shorter. Do you consider it more readable 
 than the alternative to justify trying 
 to get it working?

Totally up to you, it looked easier to maintain and obvious than several
nested ifdefery.

-- 
Clément B.


pgpzLgMCq7wMk.pgp
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] x86/flacdsp: add SSE2 and AVX decorrelate functions

2014-11-02 Thread James Almer
Two to four times faster depending on instruction set, block size and channel 
count.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/arm/flacdsp_init_arm.c |   2 +-
 libavcodec/flacdec.c  |   6 +-
 libavcodec/flacdsp.c  |   6 +-
 libavcodec/flacdsp.h  |   6 +-
 libavcodec/flacenc.c  |   2 +-
 libavcodec/x86/flacdsp.asm| 193 ++
 libavcodec/x86/flacdsp_init.c |  48 +-
 7 files changed, 251 insertions(+), 12 deletions(-)

diff --git a/libavcodec/arm/flacdsp_init_arm.c 
b/libavcodec/arm/flacdsp_init_arm.c
index 9ddb268..df1b19c 100644
--- a/libavcodec/arm/flacdsp_init_arm.c
+++ b/libavcodec/arm/flacdsp_init_arm.c
@@ -24,7 +24,7 @@
 void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
 int qlevel, int len);
 
-av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt,
+av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, 
int channels,
  int bps)
 {
 if (bps = 16  CONFIG_FLAC_DECODER)
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 1333972..01ae717 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -111,7 +111,7 @@ static av_cold int flac_decode_init(AVCodecContext *avctx)
 if (ret  0)
 return ret;
 flac_set_bps(s);
-ff_flacdsp_init(s-dsp, avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 
 return 0;
@@ -173,7 +173,7 @@ static int parse_streaminfo(FLACContext *s, const uint8_t 
*buf, int buf_size)
 if (ret  0)
 return ret;
 flac_set_bps(s);
-ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 
 return 0;
@@ -472,7 +472,7 @@ static int decode_frame(FLACContext *s)
 ret = allocate_buffers(s);
 if (ret  0)
 return ret;
-ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-bps);
+ff_flacdsp_init(s-dsp, s-avctx-sample_fmt, s-channels, s-bps);
 s-got_streaminfo = 1;
 dump_headers(s-avctx, (FLACStreaminfo *)s);
 }
diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index b15bc74..a83eb83 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -85,7 +85,7 @@ static void flac_lpc_32_c(int32_t *decoded, const int 
coeffs[32],
 
 }
 
-av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt,
+av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels,
  int bps)
 {
 if (bps  16) {
@@ -127,7 +127,7 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum 
AVSampleFormat fmt,
 }
 
 if (ARCH_ARM)
-ff_flacdsp_init_arm(c, fmt, bps);
+ff_flacdsp_init_arm(c, fmt, channels, bps);
 if (ARCH_X86)
-ff_flacdsp_init_x86(c, fmt, bps);
+ff_flacdsp_init_x86(c, fmt, channels, bps);
 }
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 14f3466..417381c 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -31,8 +31,8 @@ typedef struct FLACDSPContext {
const int32_t coefs[32], int shift);
 } FLACDSPContext;
 
-void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
-void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
-void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
+void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, 
int bps);
+void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels, int bps);
+void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int 
channels, int bps);
 
 #endif /* AVCODEC_FLACDSP_H */
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 3b72888..e66ef3d 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -428,7 +428,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
   s-options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
 
 ff_bswapdsp_init(s-bdsp);
-ff_flacdsp_init(s-flac_dsp, avctx-sample_fmt,
+ff_flacdsp_init(s-flac_dsp, avctx-sample_fmt, channels,
 avctx-bits_per_raw_sample);
 
 dprint_compression_options(s);
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 37ee87b..4091e16 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -72,3 +72,196 @@ ALIGN 16
 LPC_32 xop
 %endif
 LPC_32 sse4
+
+;-
+;void ff_flac_decorrelate_{ls,rs,ms,indep2}_16_sse2(uint8_t **out, int32_t 
**in, int channels,
+;   int len, int shift);