Before:-
Average Decicycles : 2383529
Min Decicycles : 1978455
MMX :-
Average Decicycles : 782737
Min Decicycles : 671786
SSE2 :-
Average Decicycles : 656058
Min Decicycles : 572812
---
libavcodec/tta.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/libavcodec/tta.c b/libavcodec/tta.c
index acaeaaa..140a3de 100644
--- a/libavcodec/tta.c
+++ b/libavcodec/tta.c
@@ -30,6 +30,7 @@
#define BITSTREAM_READER_LE
//#define DEBUG
#include <limits.h>
+#include "fmtconvert.h"
#include "avcodec.h"
#include "get_bits.h"
#include "libavutil/crc.h"
@@ -67,6 +68,7 @@ typedef struct TTAContext {
int32_t *decode_buffer;
TTAChannel *ch_ctx;
+ FmtConvertContext fmt_conv; ///< optimized conversion functions
} TTAContext;
static const uint32_t shift_1[] = {
@@ -285,7 +287,8 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
}
if (s->bps == 2) {
- s->decode_buffer =
av_mallocz(sizeof(int32_t)*s->frame_length*s->channels);
+ s->decode_buffer = av_mallocz(FFALIGN(s->frame_length, 8) *
+ s->channels * sizeof(*s->decode_buffer));
if (!s->decode_buffer)
return AVERROR(ENOMEM);
}
@@ -299,6 +302,7 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
return -1;
}
+ ff_fmt_convert_init(&s->fmt_conv, avctx);
avcodec_get_frame_defaults(&s->frame);
avctx->coded_frame = &s->frame;
@@ -313,6 +317,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void
*data,
TTAContext *s = avctx->priv_data;
int i, ret;
int cur_chan = 0, framelen = s->frame_length;
+ int samples_aligned;
int32_t *p;
if (avctx->err_recognition & AV_EF_CRCCHECK) {
@@ -328,11 +333,13 @@ static int tta_decode_frame(AVCodecContext *avctx, void
*data,
framelen = s->last_frame_length;
/* get output buffer */
- s->frame.nb_samples = framelen;
+ samples_aligned = FFALIGN(framelen, 8);
+ s->frame.nb_samples = samples_aligned;
if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return ret;
}
+ s->frame.nb_samples = framelen;
// decode directly to output buffer for 24-bit sample format
if (s->bps == 3)
@@ -433,8 +440,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void
*data,
// convert to output buffer
if (s->bps == 2) {
int16_t *samples = (int16_t *)s->frame.data[0];
- for (p = s->decode_buffer; p < s->decode_buffer + (framelen *
s->channels); p++)
- *samples++ = *p;
+ s->fmt_conv.int32_to_int16_clipped(samples, s->decode_buffer,
samples_aligned * s->channels);
} else {
// shift samples for 24-bit sample format
int32_t *samples = (int32_t *)s->frame.data[0];
--
1.7.10
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel