Re: [libav-devel] [RFC] Make Bink IDCT take 32-bit coefficients as input

Kostya Wed, 27 Jul 2011 04:39:23 -0700

On Wed, Jul 27, 2011 at 12:20:52PM +0100, Måns Rullgård wrote:
> Kostya <kostya.shish...@gmail.com> writes:
> 
> > On Wed, Jul 27, 2011 at 12:05:56PM +0100, Måns Rullgård wrote:
> >> Kostya <kostya.shish...@gmail.com> writes:
> >> 
> >> > On Sat, Jul 23, 2011 at 08:21:48PM +0200, Kostya wrote:
> >> >> On Sat, Jul 23, 2011 at 07:11:09PM +0100, Måns Rullgård wrote:
> >> >> > Kostya <kostya.shish...@gmail.com> writes:
> >> >> > 
> >> >> > >> >> > diff --git a/libavcodec/binkdsp.c b/libavcodec/binkdsp.c
> >> >> > >> >> > new file mode 100644
> >> >> > >> >> > index 0000000..456d71d
> >> >> > >> >> > --- /dev/null
> >> >> > >> >> > +++ b/libavcodec/binkdsp.c
> >> >> > >> >> 
> >> >> > >> >> [...]
> >> >> > >> >> 
> >> >> > >> >> > +static void bink_clear_block_c(int32_t *block)
> >> >> > >> >> > +{
> >> >> > >> >> > +    memset(block, 0, sizeof(*block) * 64);
> >> >> > >> >> > +}
> >> >> > >> >> > +
> >> >> > >> >> 
> >> >> > >> >> These almost exist in dsputil...
> >> >> > >> >
> >> >> > >> > But not for such drastic situation.
> >> >> > >> 
> >> >> > >> The 32-bit clear_block() actually exists exactly.  It would be a 
> >> >> > >> shame
> >> >> > >> not to use it.
> >> >> > >
> >> >> > > It's a pity you have to select high bits_per_sample to use it and 
> >> >> > > then it
> >> >> > > screws other functions like put/get_pixels*
> >> >> > 
> >> >> > Enabling 32-bit "DCTELEM" with 8-bit pixels is a 2-line patch to 
> >> >> > remove
> >> >> > an #if.  I disabled that variant since nothing needed it at the time.
> >> >> > Keeping the 16-bit version for add/put_pixels is harder.  Ideas for a
> >> >> > clean solution welcome.  One possibility is to use two DSPContexts, or
> >> >> > initialise a scratch context and pull the needed pointers out of it.
> >> >> 
> >> >> But all of this are hacks :(
> >> >
> >> > Also I'd like to add that I have to use 32-bit clear_block along with 
> >> > 16-bit
> >> > clear_block, so probably the last patch I sent is the (locally) best 
> >> > approach.
> >> 
> >> If that's the only function you need both version of, you could keep one
> >> clear_block pointer in the bink context and set it from a temp
> >> DSPContext initialised to 32 bits.
> >
> > It is possible but I'll get a bunch of warnings from GCC for doing that
> > (because of different pointer types used and declared in DSPContext). Maybe
> > it's better to replace that function with direct memset() call where it's
> > used?
> 
> I suppose clear_block is meant to be faster if implemented in simd.


Eventually it was - two years ago (73b02e24604961e49a63ca34203d8f6c56612117)

> Since I doubt anyone cares a great deal about bink performance, I
> suppose using memset directly for the 32-bit case here is acceptable.

Here's an updated patch.

>From beddf62b870621ddaf83a0ae9fc3e0bb7ba8e88b Mon Sep 17 00:00:00 2001
From: Kostya Shishkov <kostya.shish...@gmail.com>
Date: Sat, 23 Jul 2011 15:46:35 +0200
Subject: [PATCH] Make Bink DCT take 32-bit input

Since IDC transforming 32-bit input to 8-bit output is unusual and unpractical
for most codecs, move Bink IDCT into separate context. Get rid of an additional
permutation table while at it since SIMD support for DCT is unlikely to be
implemented in foreseeable future.
Quantisation tables also have to change type into signed one for proper
quantisation of DCT coefficients.
---
 libavcodec/Makefile   |    2 +-
 libavcodec/bink.c     |   59 +++++++++++----------
 libavcodec/binkdata.h |    4 +-
 libavcodec/binkdsp.c  |  140 +++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/binkdsp.h  |   41 ++++++++++++++
 libavcodec/binkidct.c |  112 ---------------------------------------
 libavcodec/dsputil.c  |    5 --
 7 files changed, 215 insertions(+), 148 deletions(-)
 create mode 100644 libavcodec/binkdsp.c
 create mode 100644 libavcodec/binkdsp.h
 delete mode 100644 libavcodec/binkidct.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 99ecbbf..36e07a9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -89,7 +89,7 @@ OBJS-$(CONFIG_AURA2_DECODER)           += aura.o
 OBJS-$(CONFIG_AVS_DECODER)             += avs.o
 OBJS-$(CONFIG_BETHSOFTVID_DECODER)     += bethsoftvideo.o
 OBJS-$(CONFIG_BFI_DECODER)             += bfi.o
-OBJS-$(CONFIG_BINK_DECODER)            += bink.o binkidct.o
+OBJS-$(CONFIG_BINK_DECODER)            += bink.o binkdsp.o
 OBJS-$(CONFIG_BINKAUDIO_DCT_DECODER)   += binkaudio.o wma.o
 OBJS-$(CONFIG_BINKAUDIO_RDFT_DECODER)  += binkaudio.o wma.o
 OBJS-$(CONFIG_BMP_DECODER)             += bmp.o msrledec.o
diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index e085aa5..d168fdf 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -24,6 +24,7 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "binkdata.h"
+#include "binkdsp.h"
 #include "mathops.h"
 
 #define ALT_BITSTREAM_READER_LE
@@ -60,8 +61,8 @@ static const int binkb_bundle_signed[BINKB_NB_SRC] = {
     0, 0, 0, 1, 1, 0, 1, 0, 0, 0
 };
 
-static uint32_t binkb_intra_quant[16][64];
-static uint32_t binkb_inter_quant[16][64];
+static int32_t binkb_intra_quant[16][64];
+static int32_t binkb_inter_quant[16][64];
 
 /**
  * IDs for different data types used in Bink video codec
@@ -109,11 +110,11 @@ typedef struct Bundle {
 typedef struct BinkContext {
     AVCodecContext *avctx;
     DSPContext     dsp;
+    BinkDSPContext bdsp;
     AVFrame        pic, last;
     int            version;              ///< internal Bink file version
     int            has_alpha;
     int            swap_planes;
-    ScanTable      scantable;            ///< permutated scantable for DCT coeffs decoding
 
     Bundle         bundle[BINKB_NB_SRC]; ///< bundles for decoding all data types
     Tree           col_high[16];         ///< trees for decoding high nibble in "colours" data type
@@ -580,8 +581,8 @@ static inline int binkb_get_value(BinkContext *c, int bundle_num)
  * @param quant_matrices quantization matrices
  * @return 0 for success, negative value in other cases
  */
-static int read_dct_coeffs(GetBitContext *gb, DCTELEM block[64], const uint8_t *scan,
-                           const uint32_t quant_matrices[16][64], int q)
+static int read_dct_coeffs(GetBitContext *gb, int32_t block[64], const uint8_t *scan,
+                           const int32_t quant_matrices[16][64], int q)
 {
     int coef_list[128];
     int mode_list[128];
@@ -590,7 +591,7 @@ static int read_dct_coeffs(GetBitContext *gb, DCTELEM block[64], const uint8_t *
     int coef_count = 0;
     int coef_idx[64];
     int quant_idx;
-    const uint32_t *quant;
+    const int32_t *quant;
 
     coef_list[list_end] = 4;  mode_list[list_end++] = 0;
     coef_list[list_end] = 24; mode_list[list_end++] = 0;
@@ -791,6 +792,7 @@ static int binkb_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
     const uint8_t *scan;
     int xoff, yoff;
     LOCAL_ALIGNED_16(DCTELEM, block, [64]);
+    LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
     int coordmap[64];
     int ybias = is_key ? -15 : 0;
     int qp;
@@ -845,11 +847,11 @@ static int binkb_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
                     dst[coordmap[*scan++]] = binkb_get_value(c, BINKB_SRC_COLORS);
                 break;
             case 2:
-                c->dsp.clear_block(block);
-                block[0] = binkb_get_value(c, BINKB_SRC_INTRA_DC);
+                memset(dctblock, 0, sizeof(*dctblock) * 64);
+                dctblock[0] = binkb_get_value(c, BINKB_SRC_INTRA_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTRA_Q);
-                read_dct_coeffs(gb, block, c->scantable.permutated, binkb_intra_quant, qp);
-                c->dsp.idct_put(dst, stride, block);
+                read_dct_coeffs(gb, dctblock, bink_scan, binkb_intra_quant, qp);
+                c->bdsp.idct_put(dst, stride, dctblock);
                 break;
             case 3:
                 xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
@@ -878,11 +880,11 @@ static int binkb_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
                 } else {
                     put_pixels8x8_overlapped(dst, ref, stride);
                 }
-                c->dsp.clear_block(block);
-                block[0] = binkb_get_value(c, BINKB_SRC_INTER_DC);
+                memset(dctblock, 0, sizeof(*dctblock) * 64);
+                dctblock[0] = binkb_get_value(c, BINKB_SRC_INTER_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTER_Q);
-                read_dct_coeffs(gb, block, c->scantable.permutated, binkb_inter_quant, qp);
-                c->dsp.idct_add(dst, stride, block);
+                read_dct_coeffs(gb, dctblock, bink_scan, binkb_inter_quant, qp);
+                c->bdsp.idct_add(dst, stride, dctblock);
                 break;
             case 5:
                 v = binkb_get_value(c, BINKB_SRC_COLORS);
@@ -937,6 +939,7 @@ static int bink_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
     int xoff, yoff;
     LOCAL_ALIGNED_16(DCTELEM, block, [64]);
     LOCAL_ALIGNED_16(uint8_t, ublock, [64]);
+    LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
     int coordmap[64];
 
     const int stride = c->pic.linesize[plane_idx];
@@ -1019,11 +1022,11 @@ static int bink_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
                         ublock[*scan++] = get_value(c, BINK_SRC_COLORS);
                     break;
                 case INTRA_BLOCK:
-                    c->dsp.clear_block(block);
-                    block[0] = get_value(c, BINK_SRC_INTRA_DC);
-                    read_dct_coeffs(gb, block, c->scantable.permutated, bink_intra_quant, -1);
-                    c->dsp.idct(block);
-                    c->dsp.put_pixels_nonclamped(block, ublock, 8);
+                    memset(dctblock, 0, sizeof(*dctblock) * 64);
+                    dctblock[0] = get_value(c, BINK_SRC_INTRA_DC);
+                    read_dct_coeffs(gb, dctblock, bink_scan, bink_intra_quant, -1);
+                    c->bdsp.idct(dctblock);
+                    c->bdsp.put_block(dctblock, ublock);
                     break;
                 case FILL_BLOCK:
                     v = get_value(c, BINK_SRC_COLORS);
@@ -1103,10 +1106,10 @@ static int bink_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
                 c->dsp.add_pixels8(dst, block, stride);
                 break;
             case INTRA_BLOCK:
-                c->dsp.clear_block(block);
-                block[0] = get_value(c, BINK_SRC_INTRA_DC);
-                read_dct_coeffs(gb, block, c->scantable.permutated, bink_intra_quant, -1);
-                c->dsp.idct_put(dst, stride, block);
+                memset(dctblock, 0, sizeof(*dctblock) * 64);
+                dctblock[0] = get_value(c, BINK_SRC_INTRA_DC);
+                read_dct_coeffs(gb, dctblock, bink_scan, bink_intra_quant, -1);
+                c->bdsp.idct_put(dst, stride, dctblock);
                 break;
             case FILL_BLOCK:
                 v = get_value(c, BINK_SRC_COLORS);
@@ -1117,10 +1120,10 @@ static int bink_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
                 yoff = get_value(c, BINK_SRC_Y_OFF);
                 ref = prev + xoff + yoff * stride;
                 c->dsp.put_pixels_tab[1][0](dst, ref, stride, 8);
-                c->dsp.clear_block(block);
-                block[0] = get_value(c, BINK_SRC_INTER_DC);
-                read_dct_coeffs(gb, block, c->scantable.permutated, bink_inter_quant, -1);
-                c->dsp.idct_add(dst, stride, block);
+                memset(dctblock, 0, sizeof(*dctblock) * 64);
+                dctblock[0] = get_value(c, BINK_SRC_INTER_DC);
+                read_dct_coeffs(gb, dctblock, bink_scan, bink_inter_quant, -1);
+                c->bdsp.idct_add(dst, stride, dctblock);
                 break;
             case PATTERN_BLOCK:
                 for (i = 0; i < 2; i++)
@@ -1288,7 +1291,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     avctx->idct_algo = FF_IDCT_BINK;
     dsputil_init(&c->dsp, avctx);
-    ff_init_scantable(c->dsp.idct_permutation, &c->scantable, bink_scan);
+    ff_binkdsp_init(&c->bdsp);
 
     init_bundles(c);
 
diff --git a/libavcodec/binkdata.h b/libavcodec/binkdata.h
index db289ad..60f0a59 100644
--- a/libavcodec/binkdata.h
+++ b/libavcodec/binkdata.h
@@ -285,7 +285,7 @@ static const uint8_t bink_patterns[16][64] = {
     }
 };
 
-static const uint32_t bink_intra_quant[16][64] = {
+static const int32_t bink_intra_quant[16][64] = {
 {
  0x010000, 0x016315, 0x01E83D, 0x02A535, 0x014E7B, 0x016577, 0x02F1E6, 0x02724C,
  0x010000, 0x00EEDA, 0x024102, 0x017F9B, 0x00BE80, 0x00611E, 0x01083C, 0x00A552,
@@ -448,7 +448,7 @@ static const uint32_t bink_intra_quant[16][64] = {
 },
 };
 
-static const uint32_t bink_inter_quant[16][64] = {
+static const int32_t bink_inter_quant[16][64] = {
 {
  0x010000, 0x017946, 0x01A5A9, 0x0248DC, 0x016363, 0x0152A7, 0x0243EC, 0x0209EA,
  0x012000, 0x00E248, 0x01BBDA, 0x015CBC, 0x00A486, 0x0053E0, 0x00F036, 0x008095,
diff --git a/libavcodec/binkdsp.c b/libavcodec/binkdsp.c
new file mode 100644
index 0000000..cd1aacd
--- /dev/null
+++ b/libavcodec/binkdsp.c
@@ -0,0 +1,140 @@
+/*
+ * Bink DSP routines
+ * Copyright (c) 2009 Kostya Shishkov
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Bink DSP routines
+ */
+
+#include "dsputil.h"
+#include "binkdsp.h"
+
+#define A1  2896 /* (1/sqrt(2))<<12 */
+#define A2  2217
+#define A3  3784
+#define A4 -5352
+
+#define IDCT_TRANSFORM(dest,s0,s1,s2,s3,s4,s5,s6,s7,d0,d1,d2,d3,d4,d5,d6,d7,munge,src) {\
+    const int a0 = (src)[s0] + (src)[s4]; \
+    const int a1 = (src)[s0] - (src)[s4]; \
+    const int a2 = (src)[s2] + (src)[s6]; \
+    const int a3 = (A1*((src)[s2] - (src)[s6])) >> 11; \
+    const int a4 = (src)[s5] + (src)[s3]; \
+    const int a5 = (src)[s5] - (src)[s3]; \
+    const int a6 = (src)[s1] + (src)[s7]; \
+    const int a7 = (src)[s1] - (src)[s7]; \
+    const int b0 = a4 + a6; \
+    const int b1 = (A3*(a5 + a7)) >> 11; \
+    const int b2 = ((A4*a5) >> 11) - b0 + b1; \
+    const int b3 = (A1*(a6 - a4) >> 11) - b2; \
+    const int b4 = ((A2*a7) >> 11) + b3 - b1; \
+    (dest)[d0] = munge(a0+a2   +b0); \
+    (dest)[d1] = munge(a1+a3-a2+b2); \
+    (dest)[d2] = munge(a1-a3+a2+b3); \
+    (dest)[d3] = munge(a0-a2   -b4); \
+    (dest)[d4] = munge(a0-a2   +b4); \
+    (dest)[d5] = munge(a1-a3+a2-b3); \
+    (dest)[d6] = munge(a1+a3-a2-b2); \
+    (dest)[d7] = munge(a0+a2   -b0); \
+}
+/* end IDCT_TRANSFORM macro */
+
+#define MUNGE_NONE(x) (x)
+#define IDCT_COL(dest,src) IDCT_TRANSFORM(dest,0,8,16,24,32,40,48,56,0,8,16,24,32,40,48,56,MUNGE_NONE,src)
+
+#define MUNGE_ROW(x) (((x) + 0x7F)>>8)
+#define IDCT_ROW(dest,src) IDCT_TRANSFORM(dest,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,MUNGE_ROW,src)
+
+static inline void bink_idct_col(int *dest, const int32_t *src)
+{
+    if ((src[8]|src[16]|src[24]|src[32]|src[40]|src[48]|src[56])==0) {
+        dest[0]  =
+        dest[8]  =
+        dest[16] =
+        dest[24] =
+        dest[32] =
+        dest[40] =
+        dest[48] =
+        dest[56] = src[0];
+    } else {
+        IDCT_COL(dest, src);
+    }
+}
+
+static void bink_idct_c(int32_t *block)
+{
+    int i;
+    int temp[64];
+
+    for (i = 0; i < 8; i++)
+        bink_idct_col(&temp[i], &block[i]);
+    for (i = 0; i < 8; i++) {
+        IDCT_ROW( (&block[8*i]), (&temp[8*i]) );
+    }
+}
+
+static void bink_idct_add_c(uint8_t *dest, int linesize, int32_t *block)
+{
+    int i, j;
+
+    bink_idct_c(block);
+    for (i = 0; i < 8; i++, dest += linesize, block += 8)
+        for (j = 0; j < 8; j++)
+             dest[j] += block[j];
+}
+
+static void bink_idct_put_c(uint8_t *dest, int linesize, int32_t *block)
+{
+    int i;
+    int temp[64];
+    for (i = 0; i < 8; i++)
+        bink_idct_col(&temp[i], &block[i]);
+    for (i = 0; i < 8; i++) {
+        IDCT_ROW( (&dest[i*linesize]), (&temp[8*i]) );
+    }
+}
+
+static void bink_put_block_c(const int32_t *block, uint8_t *restrict pixels)
+{
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        pixels[0] = block[0];
+        pixels[1] = block[1];
+        pixels[2] = block[2];
+        pixels[3] = block[3];
+        pixels[4] = block[4];
+        pixels[5] = block[5];
+        pixels[6] = block[6];
+        pixels[7] = block[7];
+
+        pixels += 8;
+        block  += 8;
+    }
+}
+
+void ff_binkdsp_init(BinkDSPContext *c)
+{
+    c->idct        = bink_idct_c;
+    c->idct_add    = bink_idct_add_c;
+    c->idct_put    = bink_idct_put_c;
+    c->put_block   = bink_put_block_c;
+}
diff --git a/libavcodec/binkdsp.h b/libavcodec/binkdsp.h
new file mode 100644
index 0000000..65e7a4f
--- /dev/null
+++ b/libavcodec/binkdsp.h
@@ -0,0 +1,41 @@
+/*
+ * Bink DSP routines
+ * Copyright (c) 2009 Kostya Shishkov
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Bink DSP routines
+ */
+
+#ifndef AVCODEC_BINKDSP_H
+#define AVCODEC_BINKDSP_H
+
+#include "dsputil.h"
+
+typedef struct BinkDSPContext {
+    void (*idct)(int32_t *block/* align 16*/);
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, int32_t *block/*align 16*/);
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, int32_t *block/*align 16*/);
+    void (*put_block)(const int32_t *block/*align 16*/, uint8_t *pixels/*align 8*/);
+} BinkDSPContext;
+
+void ff_binkdsp_init(BinkDSPContext *c);
+
+#endif /* AVCODEC_BINKDSP_H */
diff --git a/libavcodec/binkidct.c b/libavcodec/binkidct.c
deleted file mode 100644
index 2326a61..0000000
--- a/libavcodec/binkidct.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Bink IDCT algorithm
- * Copyright (c) 2009 Kostya Shishkov
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Bink IDCT algorithm
- */
-
-#include "dsputil.h"
-
-#define A1  2896 /* (1/sqrt(2))<<12 */
-#define A2  2217
-#define A3  3784
-#define A4 -5352
-
-#define IDCT_TRANSFORM(dest,s0,s1,s2,s3,s4,s5,s6,s7,d0,d1,d2,d3,d4,d5,d6,d7,munge,src) {\
-    const int a0 = (src)[s0] + (src)[s4]; \
-    const int a1 = (src)[s0] - (src)[s4]; \
-    const int a2 = (src)[s2] + (src)[s6]; \
-    const int a3 = (A1*((src)[s2] - (src)[s6])) >> 11; \
-    const int a4 = (src)[s5] + (src)[s3]; \
-    const int a5 = (src)[s5] - (src)[s3]; \
-    const int a6 = (src)[s1] + (src)[s7]; \
-    const int a7 = (src)[s1] - (src)[s7]; \
-    const int b0 = a4 + a6; \
-    const int b1 = (A3*(a5 + a7)) >> 11; \
-    const int b2 = ((A4*a5) >> 11) - b0 + b1; \
-    const int b3 = (A1*(a6 - a4) >> 11) - b2; \
-    const int b4 = ((A2*a7) >> 11) + b3 - b1; \
-    (dest)[d0] = munge(a0+a2   +b0); \
-    (dest)[d1] = munge(a1+a3-a2+b2); \
-    (dest)[d2] = munge(a1-a3+a2+b3); \
-    (dest)[d3] = munge(a0-a2   -b4); \
-    (dest)[d4] = munge(a0-a2   +b4); \
-    (dest)[d5] = munge(a1-a3+a2-b3); \
-    (dest)[d6] = munge(a1+a3-a2-b2); \
-    (dest)[d7] = munge(a0+a2   -b0); \
-}
-/* end IDCT_TRANSFORM macro */
-
-#define MUNGE_NONE(x) (x)
-#define IDCT_COL(dest,src) IDCT_TRANSFORM(dest,0,8,16,24,32,40,48,56,0,8,16,24,32,40,48,56,MUNGE_NONE,src)
-
-#define MUNGE_ROW(x) (((x) + 0x7F)>>8)
-#define IDCT_ROW(dest,src) IDCT_TRANSFORM(dest,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,MUNGE_ROW,src)
-
-static inline void bink_idct_col(int *dest, const DCTELEM *src)
-{
-    if ((src[8]|src[16]|src[24]|src[32]|src[40]|src[48]|src[56])==0) {
-        dest[0]  =
-        dest[8]  =
-        dest[16] =
-        dest[24] =
-        dest[32] =
-        dest[40] =
-        dest[48] =
-        dest[56] = src[0];
-    } else {
-        IDCT_COL(dest, src);
-    }
-}
-
-void ff_bink_idct_c(DCTELEM *block)
-{
-    int i;
-    int temp[64];
-
-    for (i = 0; i < 8; i++)
-        bink_idct_col(&temp[i], &block[i]);
-    for (i = 0; i < 8; i++) {
-        IDCT_ROW( (&block[8*i]), (&temp[8*i]) );
-    }
-}
-
-void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block)
-{
-    int i, j;
-
-    ff_bink_idct_c(block);
-    for (i = 0; i < 8; i++, dest += linesize, block += 8)
-        for (j = 0; j < 8; j++)
-             dest[j] += block[j];
-}
-
-void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block)
-{
-    int i;
-    int temp[64];
-    for (i = 0; i < 8; i++)
-        bink_idct_col(&temp[i], &block[i]);
-    for (i = 0; i < 8; i++) {
-        IDCT_ROW( (&dest[i*linesize]), (&temp[8*i]) );
-    }
-}
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 09e58f4..04c2ff6 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2894,11 +2894,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
         }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
             c->idct_put= ff_ea_idct_put_c;
             c->idct_permutation_type= FF_NO_IDCT_PERM;
-        }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
-            c->idct     = ff_bink_idct_c;
-            c->idct_add = ff_bink_idct_add_c;
-            c->idct_put = ff_bink_idct_put_c;
-            c->idct_permutation_type = FF_NO_IDCT_PERM;
         }else{ //accurate/default
             c->idct_put = ff_simple_idct_put_8;
             c->idct_add = ff_simple_idct_add_8;
-- 
1.7.0.4

_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [RFC] Make Bink IDCT take 32-bit coefficients as input

Reply via email to