Signed-off-by: Mans Rullgard <m...@mansr.com> --- Slightly improved the neon function using a fixed-point to float conversion instruction instead of letting gcc mess up the /16.
Fixed indentation Diego complained about by using a temp variable to shorten the line. This is also more readable. --- libavcodec/arm/dca.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ libavcodec/dca.c | 27 +++++++++++++++++++-------- libavcodec/dcadata.h | 2 +- 3 files changed, 69 insertions(+), 9 deletions(-) create mode 100644 libavcodec/arm/dca.h diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h new file mode 100644 index 0000000..c4c024a --- /dev/null +++ b/libavcodec/arm/dca.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 Mans Rullgard <m...@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ARM_DCA_H +#define AVCODEC_ARM_DCA_H + +#include <stdint.h> +#include "config.h" + +#if HAVE_NEON && HAVE_INLINE_ASM + +#define int8x8_fmul_int32 int8x8_fmul_int32 +static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale) +{ + __asm__ ("vcvt.f32.s32 %2, %2, #4 \n" + "vld1.8 {d0}, [%1,:64] \n" + "vmovl.s8 q0, d0 \n" + "vmovl.s16 q1, d1 \n" + "vmovl.s16 q0, d0 \n" + "vcvt.f32.s32 q0, q0 \n" + "vcvt.f32.s32 q1, q1 \n" + "vmul.f32 q0, q0, %y2 \n" + "vmul.f32 q1, q1, %y2 \n" + "vst1.32 {q0-q1}, [%m0,:128] \n" + : "=Um"(*(float (*)[8])dst) + : "r"(src), "x"(scale) + : "d0", "d1", "d2", "d3"); +} + +#endif + +#endif /* AVCODEC_ARM_DCA_H */ diff --git a/libavcodec/dca.c b/libavcodec/dca.c index 735d7ba..e963fe0 100644 --- a/libavcodec/dca.c +++ b/libavcodec/dca.c @@ -42,6 +42,10 @@ #include "dcadsp.h" #include "fmtconvert.h" +#if ARCH_ARM +# include "arm/dca.h" +#endif + //#define TRACE #define DCA_PRIM_CHANNELS_MAX (7) @@ -320,7 +324,7 @@ typedef struct { int lfe_scale_factor; /* Subband samples history (for ADPCM) */ - float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; + DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512]; DECLARE_ALIGNED(32, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32]; int hist_index[DCA_PRIM_CHANNELS_MAX]; @@ -1057,6 +1061,16 @@ static int decode_blockcode(int code, int levels, int *values) static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 }; static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 }; +#ifndef int8x8_fmul_int32 +static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale) +{ + float fscale = scale / 16.0; + int i; + for (i = 0; i < 8; i++) + dst[i] = src[i] * fscale; +} +#endif + static int dca_subsubframe(DCAContext * s, int base_channel, int block_index) { int k, l; @@ -1161,19 +1175,16 @@ static int dca_subsubframe(DCAContext * s, int base_channel, int block_index) for (l = s->vq_start_subband[k]; l < s->subband_activity[k]; l++) { /* 1 vector -> 32 samples but we only need the 8 samples * for this subsubframe. */ - int m; + int hfvq = s->high_freq_vq[k][l]; if (!s->debug_flag & 0x01) { av_log(s->avctx, AV_LOG_DEBUG, "Stream with high frequencies VQ coding\n"); s->debug_flag |= 0x01; } - for (m = 0; m < 8; m++) { - subband_samples[k][l][m] = - high_freq_vq[s->high_freq_vq[k][l]][subsubframe * 8 + - m] - * (float) s->scale_factor[k][l][0] / 16.0; - } + int8x8_fmul_int32(subband_samples[k][l], + &high_freq_vq[hfvq][subsubframe * 8], + s->scale_factor[k][l][0]); } } diff --git a/libavcodec/dcadata.h b/libavcodec/dcadata.h index ed3ec4e..0a83cdf 100644 --- a/libavcodec/dcadata.h +++ b/libavcodec/dcadata.h @@ -4224,7 +4224,7 @@ static const float lossless_quant_d[32] = { /* Vector quantization tables */ -static const int8_t high_freq_vq[1024][32] = +DECLARE_ALIGNED(8, static const int8_t, high_freq_vq)[1024][32] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, -- 1.7.6.1 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel