[libav-devel] [PATCH 1/3] prores: extract idct into its own dspcontext and merge with put_pixels.

Ronald S. Bultje Mon, 03 Oct 2011 11:37:49 -0700

From: "Ronald S. Bultje" <[email protected]>

---
 libavcodec/Makefile    |    2 +-
 libavcodec/dsputil.c   |   65 +++++++++++++++++++++----------------
 libavcodec/proresdec.c |   83 +++++++++++++-----------------------------------
 libavcodec/proresdsp.c |   61 +++++++++++++++++++++++++++++++++++
 libavcodec/proresdsp.h |   38 ++++++++++++++++++++++
 5 files changed, 159 insertions(+), 90 deletions(-)
 create mode 100644 libavcodec/proresdsp.c
 create mode 100644 libavcodec/proresdsp.h


diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3c4e2f8..b7b5124 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o
 OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
-OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o
+OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdsp.o
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
 OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
                                           celp_filters.o acelp_vectors.o \
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 967406e..e248516 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable 
*st, const uint8_t *src_s
     }
 }
 
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                   int idct_permutation_type)
+{
+    int i;
+
+    switch(idct_permutation_type){
+        case FF_NO_IDCT_PERM:
+            for(i=0; i<64; i++)
+                idct_permutation[i]= i;
+            break;
+        case FF_LIBMPEG2_IDCT_PERM:
+            for(i=0; i<64; i++)
+                idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 
2);
+            break;
+        case FF_SIMPLE_IDCT_PERM:
+            for(i=0; i<64; i++)
+                idct_permutation[i]= simple_mmx_permutation[i];
+            break;
+        case FF_TRANSPOSE_IDCT_PERM:
+            for(i=0; i<64; i++)
+                idct_permutation[i]= ((i&7)<<3) | (i>>3);
+            break;
+        case FF_PARTTRANS_IDCT_PERM:
+            for(i=0; i<64; i++)
+                idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
+            break;
+        case FF_SSE2_IDCT_PERM:
+            for(i=0; i<64; i++)
+                idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
+            break;
+        default:
+            av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not 
set\n");
+    }
+}
+
 static int pix_sum_c(uint8_t * pix, int line_size)
 {
     int s, i, j;
@@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext 
*avctx)
             c->avg_2tap_qpel_pixels_tab[0][i]= 
c->avg_h264_qpel_pixels_tab[0][i];
     }
 
-    switch(c->idct_permutation_type){
-    case FF_NO_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= i;
-        break;
-    case FF_LIBMPEG2_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 
2);
-        break;
-    case FF_SIMPLE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= simple_mmx_permutation[i];
-        break;
-    case FF_TRANSPOSE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
-        break;
-    case FF_PARTTRANS_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
-        break;
-    case FF_SSE2_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
-        break;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not 
set\n");
-    }
+    ff_init_scantable_permutation(c->idct_permutation,
+                                  c->idct_permutation_type);
 }
diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index c70d145..0424093 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -34,17 +34,11 @@
 
 #include "libavutil/intmath.h"
 #include "avcodec.h"
-#include "dsputil.h"
+#include "proresdsp.h"
 #include "get_bits.h"
 
-#define BITS_PER_SAMPLE 10                              ///< output precision 
of that decoder
-#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for 
converting signed pixels into unsigned ones
-#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value for 
clipping resulting pixels
-#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for 
clipping resulting pixels
-
-
 typedef struct {
-    DSPContext dsp;
+    ProresDSPContext dsp;
     AVFrame    picture;
     ScanTable  scantable;
     int        scantable_type;           ///< -1 = uninitialized, 0 = 
progressive, 1/2 = interlaced
@@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
 
-    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
-    dsputil_init(&ctx->dsp, avctx);
+    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
+    ff_proresdsp_init(&ctx->dsp);
 
     avctx->coded_frame = &ctx->picture;
     avcodec_get_frame_defaults(&ctx->picture);
@@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, 
DCTELEM *out,
 }
 
 
-#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
-
-/**
- * Add bias value, clamp and output pixels of a slice
- */
-static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
-                       int mbs_per_slice, int blocks_per_mb)
-{
-    int mb, x, y, src_offset, dst_offset;
-    const DCTELEM *src1, *src2;
-    uint16_t *dst1, *dst2;
-
-    src1 = in;
-    src2 = in + (blocks_per_mb << 5);
-    dst1 = out;
-    dst2 = out + (stride << 3);
-
-    for (mb = 0; mb < mbs_per_slice; mb++) {
-        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
-            for (x = 0; x < 8; x++) {
-                src_offset = (y << 3) + x;
-
-                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
-                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
-
-                if (blocks_per_mb > 2) {
-                    dst1[dst_offset + x + 8] =
-                        CLIP_AND_BIAS(src1[src_offset + 64]);
-                    dst2[dst_offset + x + 8] =
-                        CLIP_AND_BIAS(src2[src_offset + 64]);
-                }
-            }
-        }
-
-        src1 += blocks_per_mb << 6;
-        src2 += blocks_per_mb << 6;
-        dst1 += blocks_per_mb << 2;
-        dst2 += blocks_per_mb << 2;
-    }
-}
-
-
 /**
  * Decode a slice plane (luma or chroma).
  */
@@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const 
uint8_t *buf,
 {
     GetBitContext gb;
     DCTELEM *block_ptr;
-    int i, blk_num, blocks_per_slice;
+    int i, j, mb_num, blocks_per_slice;
 
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
@@ -518,20 +470,29 @@ static void decode_slice_plane(ProresContext *ctx, const 
uint8_t *buf,
     /* inverse quantization, inverse transform and output */
     block_ptr = ctx->blocks;
 
-    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) {
+    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += 
blocks_per_mb * 4) {
         /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
          * and the input of the inverse transform should be scaled by 2
          * in order to avoid rounding errors.
          * Due to the fact the existing Libav transforms are incompatible with
          * that input I temporally introduced the coarse solution below... */
-        for (i = 0; i < 64; i++)
-            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
-
-        ctx->dsp.idct(block_ptr);
+        for (j = 0; j < blocks_per_mb; j++)
+            for (i = 0; i < 64; i++)
+                block_ptr[j * 64 + i] = (block_ptr[j * 64 + i] * qmat[i]) >> 2;
+
+        ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr);
+        block_ptr += 64;
+        if (blocks_per_mb > 2) {
+            ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr);
+            block_ptr += 64;
+        }
+        ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr);
+        block_ptr += 64;
+        if (blocks_per_mb > 2) {
+            ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr);
+            block_ptr += 64;
+        }
     }
-
-    put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice,
-               blocks_per_mb);
 }
 
 
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
new file mode 100644
index 0000000..7f20c83
--- /dev/null
+++ b/libavcodec/proresdsp.c
@@ -0,0 +1,61 @@
+/*
+ * Apple ProRes compatible decoder
+ *
+ * Copyright (c) 2010-2011 Maxim Poliakovski
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "proresdsp.h"
+#include "simple_idct.h"
+
+#define BIAS     (1 << (PRORES_BITS_PER_SAMPLE - 1))           ///< bias value 
for converting signed pixels into unsigned ones
+#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8))           ///< minimum 
value for clipping resulting pixels
+#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum 
value for clipping resulting pixels
+
+#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
+
+/**
+ * Add bias value, clamp and output pixels of a slice
+ */
+static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
+{
+    int x, y, src_offset, dst_offset;
+
+    for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
+        for (x = 0; x < 8; x++) {
+            src_offset = (y << 3) + x;
+
+            dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]);
+        }
+    }
+}
+
+static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block)
+{
+    ff_simple_idct_10(block);
+    put_pixels(out, linesize >> 1, block);
+}
+
+void ff_proresdsp_init(ProresDSPContext *dsp)
+{
+    dsp->idct_put = prores_idct_put_c;
+    dsp->idct_permutation_type = FF_NO_IDCT_PERM;
+
+    ff_init_scantable_permutation(dsp->idct_permutation,
+                                  dsp->idct_permutation_type);
+}
diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h
new file mode 100644
index 0000000..96a5cb6
--- /dev/null
+++ b/libavcodec/proresdsp.h
@@ -0,0 +1,38 @@
+/*
+ * Apple ProRes compatible decoder
+ *
+ * Copyright (c) 2010-2011 Maxim Poliakovski
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef LIBAVCODEC_PRORESDSP_H
+#define LIBAVCODEC_PRORESDSP_H
+
+#include "dsputil.h"
+
+#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of that decoder
+
+typedef struct {
+    int idct_permutation_type;
+    uint8_t idct_permutation[64];
+    void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block);
+} ProresDSPContext;
+
+void ff_proresdsp_init(ProresDSPContext *dsp);
+
+#endif /* LIBAVCODEC_PRORESDSP_H */
-- 
1.7.6

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/3] prores: extract idct into its own dspcontext and merge with put_pixels.

Reply via email to