[libav-devel] [PATCH 2/2] tta: Use optimized int32 to int16 function.

Aneesh Dogra Wed, 09 May 2012 23:01:07 -0700

Before:-

Average Decicycles : 2383529
Min     Decicycles : 1978455


MMX :-

Average Decicycles : 782737
Min     Decicycles : 671786

SSE2 :-

Average Decicycles : 656058
Min     Decicycles : 572812
---
 libavcodec/tta.c |   14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/libavcodec/tta.c b/libavcodec/tta.c
index acaeaaa..140a3de 100644
--- a/libavcodec/tta.c
+++ b/libavcodec/tta.c
@@ -30,6 +30,7 @@
 #define BITSTREAM_READER_LE
 //#define DEBUG
 #include <limits.h>
+#include "fmtconvert.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "libavutil/crc.h"
@@ -67,6 +68,7 @@ typedef struct TTAContext {
     int32_t *decode_buffer;
 
     TTAChannel *ch_ctx;
+    FmtConvertContext fmt_conv;             ///< optimized conversion functions
 } TTAContext;
 
 static const uint32_t shift_1[] = {
@@ -285,7 +287,8 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
         }
 
         if (s->bps == 2) {
-            s->decode_buffer = 
av_mallocz(sizeof(int32_t)*s->frame_length*s->channels);
+            s->decode_buffer = av_mallocz(FFALIGN(s->frame_length, 8) *
+                               s->channels * sizeof(*s->decode_buffer));
             if (!s->decode_buffer)
                 return AVERROR(ENOMEM);
         }
@@ -299,6 +302,7 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
         return -1;
     }
 
+    ff_fmt_convert_init(&s->fmt_conv, avctx);
     avcodec_get_frame_defaults(&s->frame);
     avctx->coded_frame = &s->frame;
 
@@ -313,6 +317,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void 
*data,
     TTAContext *s = avctx->priv_data;
     int i, ret;
     int cur_chan = 0, framelen = s->frame_length;
+    int samples_aligned;
     int32_t *p;
 
     if (avctx->err_recognition & AV_EF_CRCCHECK) {
@@ -328,11 +333,13 @@ static int tta_decode_frame(AVCodecContext *avctx, void 
*data,
         framelen = s->last_frame_length;
 
     /* get output buffer */
-    s->frame.nb_samples = framelen;
+    samples_aligned     = FFALIGN(framelen, 8);
+    s->frame.nb_samples = samples_aligned;
     if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return ret;
     }
+    s->frame.nb_samples = framelen;
 
     // decode directly to output buffer for 24-bit sample format
     if (s->bps == 3)
@@ -433,8 +440,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void 
*data,
     // convert to output buffer
     if (s->bps == 2) {
         int16_t *samples = (int16_t *)s->frame.data[0];
-        for (p = s->decode_buffer; p < s->decode_buffer + (framelen * 
s->channels); p++)
-            *samples++ = *p;
+        s->fmt_conv.int32_to_int16_clipped(samples, s->decode_buffer, 
samples_aligned * s->channels);
     } else {
         // shift samples for 24-bit sample format
         int32_t *samples = (int32_t *)s->frame.data[0];
-- 
1.7.10

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] tta: Use optimized int32 to int16 function.

Reply via email to