[libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
Verified with profiling that this doesn't have a measurable effect upon overall performance. --- libavcodec/mlpdec.c | 37 - libavcodec/mlpdsp.c | 33 + libavcodec/mlpdsp.h | 23 +++ 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c index ed5a6ac..c0f2d6a 100644 --- a/libavcodec/mlpdec.c +++ b/libavcodec/mlpdec.c @@ -978,7 +978,7 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr) static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) { SubStream *s = m-substream[substr]; -unsigned int mat, src_ch, i; +unsigned int mat; unsigned int maxchan; maxchan = s-max_matrix_channel; @@ -990,31 +990,18 @@ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) } for (mat = 0; mat s-num_primitive_matrices; mat++) { -int matrix_noise_shift = s-matrix_noise_shift[mat]; unsigned int dest_ch = s-matrix_out_ch[mat]; -int32_t mask = MSB_MASK(s-quant_step_size[dest_ch]); -int32_t *coeffs = s-matrix_coeff[mat]; -int index = s-num_primitive_matrices - mat; -int index2 = 2 * index + 1; - -/* TODO: DSPContext? */ - -for (i = 0; i s-blockpos; i++) { -int32_t bypassed_lsb = m-bypassed_lsbs[i][mat]; -int32_t *samples = m-sample_buffer[i]; -int64_t accum = 0; - -for (src_ch = 0; src_ch = maxchan; src_ch++) -accum += (int64_t) samples[src_ch] * coeffs[src_ch]; - -if (matrix_noise_shift) { -index = m-access_unit_size_pow2 - 1; -accum += m-noise_buffer[index] (matrix_noise_shift + 7); -index += index2; -} - -samples[dest_ch] = ((accum 14) mask) + bypassed_lsb; -} +m-dsp.mlp_rematrix_channel(m-sample_buffer[0][0], +s-matrix_coeff[mat], +m-bypassed_lsbs[0][mat], +m-noise_buffer, +s-num_primitive_matrices - mat, +dest_ch, +s-blockpos, +maxchan, +s-matrix_noise_shift[mat], +m-access_unit_size_pow2, +MSB_MASK(s-quant_step_size[dest_ch])); } } diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c index 151cf83..dfa13af 100644 --- a/libavcodec/mlpdsp.c +++ b/libavcodec/mlpdsp.c @@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, } } +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask) +{ +unsigned int src_ch, i; +int index2 = 2 * index + 1; +for (i = 0; i blockpos; i++) { +int64_t accum = 0; + +for (src_ch = 0; src_ch = maxchan; src_ch++) +accum += (int64_t) samples[src_ch] * coeffs[src_ch]; + +if (matrix_noise_shift) { +index = access_unit_size_pow2 - 1; +accum += noise_buffer[index] (matrix_noise_shift + 7); +index += index2; +} + +samples[dest_ch] = ((accum 14) mask) + *bypassed_lsbs; +bypassed_lsbs += MAX_CHANNELS; +samples += MAX_CHANNELS; +} +} + av_cold void ff_mlpdsp_init(MLPDSPContext *c) { c-mlp_filter_channel = mlp_filter_channel; +c-mlp_rematrix_channel = ff_mlp_rematrix_channel; if (ARCH_ARM) ff_mlpdsp_init_arm(c); if (ARCH_X86) diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h index c985a17..bd864d9 100644 --- a/libavcodec/mlpdsp.h +++ b/libavcodec/mlpdsp.h @@ -24,11 +24,34 @@ #include stdint.h +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask); + typedef struct
[libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
Verified with profiling that this doesn't have a measurable effect upon overall performance. --- libavcodec/mlpdec.c | 37 - libavcodec/mlpdsp.c | 33 + libavcodec/mlpdsp.h | 23 +++ 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c index ed5a6ac..c0f2d6a 100644 --- a/libavcodec/mlpdec.c +++ b/libavcodec/mlpdec.c @@ -978,7 +978,7 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr) static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) { SubStream *s = m-substream[substr]; -unsigned int mat, src_ch, i; +unsigned int mat; unsigned int maxchan; maxchan = s-max_matrix_channel; @@ -990,31 +990,18 @@ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) } for (mat = 0; mat s-num_primitive_matrices; mat++) { -int matrix_noise_shift = s-matrix_noise_shift[mat]; unsigned int dest_ch = s-matrix_out_ch[mat]; -int32_t mask = MSB_MASK(s-quant_step_size[dest_ch]); -int32_t *coeffs = s-matrix_coeff[mat]; -int index = s-num_primitive_matrices - mat; -int index2 = 2 * index + 1; - -/* TODO: DSPContext? */ - -for (i = 0; i s-blockpos; i++) { -int32_t bypassed_lsb = m-bypassed_lsbs[i][mat]; -int32_t *samples = m-sample_buffer[i]; -int64_t accum = 0; - -for (src_ch = 0; src_ch = maxchan; src_ch++) -accum += (int64_t) samples[src_ch] * coeffs[src_ch]; - -if (matrix_noise_shift) { -index = m-access_unit_size_pow2 - 1; -accum += m-noise_buffer[index] (matrix_noise_shift + 7); -index += index2; -} - -samples[dest_ch] = ((accum 14) mask) + bypassed_lsb; -} +m-dsp.mlp_rematrix_channel(m-sample_buffer[0][0], +s-matrix_coeff[mat], +m-bypassed_lsbs[0][mat], +m-noise_buffer, +s-num_primitive_matrices - mat, +dest_ch, +s-blockpos, +maxchan, +s-matrix_noise_shift[mat], +m-access_unit_size_pow2, +MSB_MASK(s-quant_step_size[dest_ch])); } } diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c index 151cf83..dfa13af 100644 --- a/libavcodec/mlpdsp.c +++ b/libavcodec/mlpdsp.c @@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, } } +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask) +{ +unsigned int src_ch, i; +int index2 = 2 * index + 1; +for (i = 0; i blockpos; i++) { +int64_t accum = 0; + +for (src_ch = 0; src_ch = maxchan; src_ch++) +accum += (int64_t) samples[src_ch] * coeffs[src_ch]; + +if (matrix_noise_shift) { +index = access_unit_size_pow2 - 1; +accum += noise_buffer[index] (matrix_noise_shift + 7); +index += index2; +} + +samples[dest_ch] = ((accum 14) mask) + *bypassed_lsbs; +bypassed_lsbs += MAX_CHANNELS; +samples += MAX_CHANNELS; +} +} + av_cold void ff_mlpdsp_init(MLPDSPContext *c) { c-mlp_filter_channel = mlp_filter_channel; +c-mlp_rematrix_channel = ff_mlp_rematrix_channel; if (ARCH_ARM) ff_mlpdsp_init_arm(c); if (ARCH_X86) diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h index c985a17..bd864d9 100644 --- a/libavcodec/mlpdsp.h +++ b/libavcodec/mlpdsp.h @@ -24,11 +24,34 @@ #include stdint.h +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask); + typedef struct
Re: [libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
On 19/03/14 18:24, Ben Avison wrote: Verified with profiling that this doesn't have a measurable effect upon overall performance. --- libavcodec/mlpdec.c | 37 - libavcodec/mlpdsp.c | 33 + libavcodec/mlpdsp.h | 23 +++ 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c index ed5a6ac..c0f2d6a 100644 --- a/libavcodec/mlpdec.c +++ b/libavcodec/mlpdec.c @@ -978,7 +978,7 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr) static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) { SubStream *s = m-substream[substr]; -unsigned int mat, src_ch, i; +unsigned int mat; unsigned int maxchan; maxchan = s-max_matrix_channel; @@ -990,31 +990,18 @@ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) } for (mat = 0; mat s-num_primitive_matrices; mat++) { -int matrix_noise_shift = s-matrix_noise_shift[mat]; unsigned int dest_ch = s-matrix_out_ch[mat]; -int32_t mask = MSB_MASK(s-quant_step_size[dest_ch]); -int32_t *coeffs = s-matrix_coeff[mat]; -int index = s-num_primitive_matrices - mat; -int index2 = 2 * index + 1; - -/* TODO: DSPContext? */ - -for (i = 0; i s-blockpos; i++) { -int32_t bypassed_lsb = m-bypassed_lsbs[i][mat]; -int32_t *samples = m-sample_buffer[i]; -int64_t accum = 0; - -for (src_ch = 0; src_ch = maxchan; src_ch++) -accum += (int64_t) samples[src_ch] * coeffs[src_ch]; - -if (matrix_noise_shift) { -index = m-access_unit_size_pow2 - 1; -accum += m-noise_buffer[index] (matrix_noise_shift + 7); -index += index2; -} - -samples[dest_ch] = ((accum 14) mask) + bypassed_lsb; -} +m-dsp.mlp_rematrix_channel(m-sample_buffer[0][0], +s-matrix_coeff[mat], +m-bypassed_lsbs[0][mat], +m-noise_buffer, +s-num_primitive_matrices - mat, +dest_ch, +s-blockpos, +maxchan, +s-matrix_noise_shift[mat], +m-access_unit_size_pow2, +MSB_MASK(s-quant_step_size[dest_ch])); } } diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c index 151cf83..dfa13af 100644 --- a/libavcodec/mlpdsp.c +++ b/libavcodec/mlpdsp.c @@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, } } +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask) +{ +unsigned int src_ch, i; +int index2 = 2 * index + 1; +for (i = 0; i blockpos; i++) { +int64_t accum = 0; + +for (src_ch = 0; src_ch = maxchan; src_ch++) +accum += (int64_t) samples[src_ch] * coeffs[src_ch]; + +if (matrix_noise_shift) { +index = access_unit_size_pow2 - 1; +accum += noise_buffer[index] (matrix_noise_shift + 7); +index += index2; +} + +samples[dest_ch] = ((accum 14) mask) + *bypassed_lsbs; +bypassed_lsbs += MAX_CHANNELS; +samples += MAX_CHANNELS; +} +} + av_cold void ff_mlpdsp_init(MLPDSPContext *c) { c-mlp_filter_channel = mlp_filter_channel; +c-mlp_rematrix_channel = ff_mlp_rematrix_channel; if (ARCH_ARM) ff_mlpdsp_init_arm(c); if (ARCH_X86) diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h index c985a17..bd864d9 100644 --- a/libavcodec/mlpdsp.h +++ b/libavcodec/mlpdsp.h @@ -24,11 +24,34 @@ #include stdint.h +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, +
Re: [libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
On Wed, 19 Mar 2014, Luca Barbato wrote: On 19/03/14 18:24, Ben Avison wrote: Verified with profiling that this doesn't have a measurable effect upon overall performance. --- libavcodec/mlpdec.c | 37 - libavcodec/mlpdsp.c | 33 + libavcodec/mlpdsp.h | 23 +++ 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h index c985a17..bd864d9 100644 --- a/libavcodec/mlpdsp.h +++ b/libavcodec/mlpdsp.h @@ -24,11 +24,34 @@ #include stdint.h +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask); + typedef struct MLPDSPContext { void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, int firorder, int iirorder, unsigned int filter_shift, int32_t mask, int blocksize, int32_t *sample_buffer); +void (*mlp_rematrix_channel)(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask); Could you please try to have the function accept the m and s context instead of having all those parameters? Does it work equally well? That requires hand-coding (and manually updating) the struct element offsets (see libavcodec/arm/asm-offsets.h and mpegvideo_arm.c) and IIRC we generally tend to move away from it. // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
On Wed, Mar 19, 2014 at 05:24:23PM +, Ben Avison wrote: --- a/libavcodec/mlpdec.c +++ b/libavcodec/mlpdec.c --- a/libavcodec/mlpdsp.c +++ b/libavcodec/mlpdsp.c @@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, +void ff_mlp_rematrix_channel(int32_t *samples, This is not used outside of the file, so it should be a static function and not have a ff_ prefix. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
On 19/03/14 18:56, Martin Storsjö wrote: On Wed, 19 Mar 2014, Luca Barbato wrote: On 19/03/14 18:24, Ben Avison wrote: Verified with profiling that this doesn't have a measurable effect upon overall performance. --- libavcodec/mlpdec.c | 37 - libavcodec/mlpdsp.c | 33 + libavcodec/mlpdsp.h | 23 +++ 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h index c985a17..bd864d9 100644 --- a/libavcodec/mlpdsp.h +++ b/libavcodec/mlpdsp.h @@ -24,11 +24,34 @@ #include stdint.h +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask); + typedef struct MLPDSPContext { void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, int firorder, int iirorder, unsigned int filter_shift, int32_t mask, int blocksize, int32_t *sample_buffer); +void (*mlp_rematrix_channel)(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask); Could you please try to have the function accept the m and s context instead of having all those parameters? Does it work equally well? That requires hand-coding (and manually updating) the struct element offsets (see libavcodec/arm/asm-offsets.h and mpegvideo_arm.c) and IIRC we generally tend to move away from it. Fine as it is then. lu ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
[Belatedly changing out of digest mode - hope this doesn't screw up people's threading too much...] --- a/libavcodec/mlpdec.c +++ b/libavcodec/mlpdec.c --- a/libavcodec/mlpdsp.c +++ b/libavcodec/mlpdsp.c @@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, +void ff_mlp_rematrix_channel(int32_t *samples, This is not used outside of the file, so it should be a static function and not have a ff_ prefix. It is called from outside the file: from arm/mlpdsp_arm.S line 663. It's used as a fallback for rare cases that aren't handled by the assembly. Ben ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/6] truehd: break out part of rematrix_channels into platform-specific callback.
Verified with profiling that this doesn't have a measurable effect upon overall performance. --- libavcodec/mlpdec.c | 37 - libavcodec/mlpdsp.c | 33 + libavcodec/mlpdsp.h | 23 +++ 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c index ed5a6ac..c0f2d6a 100644 --- a/libavcodec/mlpdec.c +++ b/libavcodec/mlpdec.c @@ -978,7 +978,7 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr) static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) { SubStream *s = m-substream[substr]; -unsigned int mat, src_ch, i; +unsigned int mat; unsigned int maxchan; maxchan = s-max_matrix_channel; @@ -990,31 +990,18 @@ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) } for (mat = 0; mat s-num_primitive_matrices; mat++) { -int matrix_noise_shift = s-matrix_noise_shift[mat]; unsigned int dest_ch = s-matrix_out_ch[mat]; -int32_t mask = MSB_MASK(s-quant_step_size[dest_ch]); -int32_t *coeffs = s-matrix_coeff[mat]; -int index = s-num_primitive_matrices - mat; -int index2 = 2 * index + 1; - -/* TODO: DSPContext? */ - -for (i = 0; i s-blockpos; i++) { -int32_t bypassed_lsb = m-bypassed_lsbs[i][mat]; -int32_t *samples = m-sample_buffer[i]; -int64_t accum = 0; - -for (src_ch = 0; src_ch = maxchan; src_ch++) -accum += (int64_t) samples[src_ch] * coeffs[src_ch]; - -if (matrix_noise_shift) { -index = m-access_unit_size_pow2 - 1; -accum += m-noise_buffer[index] (matrix_noise_shift + 7); -index += index2; -} - -samples[dest_ch] = ((accum 14) mask) + bypassed_lsb; -} +m-dsp.mlp_rematrix_channel(m-sample_buffer[0][0], +s-matrix_coeff[mat], +m-bypassed_lsbs[0][mat], +m-noise_buffer, +s-num_primitive_matrices - mat, +dest_ch, +s-blockpos, +maxchan, +s-matrix_noise_shift[mat], +m-access_unit_size_pow2, +MSB_MASK(s-quant_step_size[dest_ch])); } } diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c index 151cf83..dfa13af 100644 --- a/libavcodec/mlpdsp.c +++ b/libavcodec/mlpdsp.c @@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, } } +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask) +{ +unsigned int src_ch, i; +int index2 = 2 * index + 1; +for (i = 0; i blockpos; i++) { +int64_t accum = 0; + +for (src_ch = 0; src_ch = maxchan; src_ch++) +accum += (int64_t) samples[src_ch] * coeffs[src_ch]; + +if (matrix_noise_shift) { +index = access_unit_size_pow2 - 1; +accum += noise_buffer[index] (matrix_noise_shift + 7); +index += index2; +} + +samples[dest_ch] = ((accum 14) mask) + *bypassed_lsbs; +bypassed_lsbs += MAX_CHANNELS; +samples += MAX_CHANNELS; +} +} + av_cold void ff_mlpdsp_init(MLPDSPContext *c) { c-mlp_filter_channel = mlp_filter_channel; +c-mlp_rematrix_channel = ff_mlp_rematrix_channel; if (ARCH_ARM) ff_mlpdsp_init_arm(c); if (ARCH_X86) diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h index c985a17..bd864d9 100644 --- a/libavcodec/mlpdsp.h +++ b/libavcodec/mlpdsp.h @@ -24,11 +24,34 @@ #include stdint.h +void ff_mlp_rematrix_channel(int32_t *samples, + const int32_t *coeffs, + const uint8_t *bypassed_lsbs, + const int8_t *noise_buffer, + int index, + unsigned int dest_ch, + uint16_t blockpos, + unsigned int maxchan, + int matrix_noise_shift, + int access_unit_size_pow2, + int32_t mask); + typedef struct