Re: [libav-devel] [PATCH 1/3] prores: extract idct into its own dspcontext and merge with put_pixels.

Ronald S. Bultje Thu, 06 Oct 2011 08:04:53 -0700

Hi,

On Thu, Oct 6, 2011 at 2:19 PM, Kostya Shishkov
<[email protected]> wrote:
> On Tue, Oct 04, 2011 at 06:33:01AM +0000, Ronald S. Bultje wrote:
>> Hi,
>>
>> On Mon, Oct 3, 2011 at 6:59 PM, Diego Biurrun <[email protected]> wrote:
>> > On Mon, Oct 03, 2011 at 11:37:35AM -0700, Ronald S. Bultje wrote:
>> >> From: "Ronald S. Bultje" <[email protected]>
>> >>
>> >> --- a/libavcodec/dsputil.c
>> >> +++ b/libavcodec/dsputil.c
>> >> @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, 
>> >> ScanTable *st, const uint8_t *src_s
>> >>
>> >> +void ff_init_scantable_permutation(uint8_t *idct_permutation,
>> >> +                                   int idct_permutation_type)
>> >> +{
>> >> +    int i;
>> >> +
>> >> +    switch(idct_permutation_type){
>> >> +        case FF_NO_IDCT_PERM:
>> >> +            for(i=0; i<64; i++)
>> >> +                idct_permutation[i]= i;
>> >> +            break;
>> >> @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, 
>> >> AVCodecContext *avctx)
>> >>
>> >> -    switch(c->idct_permutation_type){
>> >> -    case FF_NO_IDCT_PERM:
>> >> -        for(i=0; i<64; i++)
>> >> -            c->idct_permutation[i]= i;
>> >> -        break;
>> >
>> > Please maintain K&R style and keep the case statements at the indentation
>> > level as the switch.
>> >
>> >> --- /dev/null
>> >> +++ b/libavcodec/proresdsp.h
>> >> @@ -0,0 +1,38 @@
>> >> +
>> >> +#ifndef LIBAVCODEC_PRORESDSP_H
>> >> +#define LIBAVCODEC_PRORESDSP_H
>> >
>> > AVCODEC_PRORESDSP_H
>> >
>> >> +#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of that decoder
>> >
>> > "that"?  Just write "prores"...
>>
>> All fixed, see attached.
>>
>> Ronald
>
>>
>> Return-Path: <[email protected]>
>> Received: from localhost.localdomain (dhcp-172-22-79-63.mtv.corp.google.com 
>> [172.22.79.63])
>>         by mx.google.com with ESMTPS id 
>> ji3sm56244687pbc.2.2011.10.03.11.37.41
>>         (version=TLSv1/SSLv3 cipher=OTHER);
>>         Mon, 03 Oct 2011 11:37:41 -0700 (PDT)
>> From: "Ronald S. Bultje" <[email protected]>
>> To: [email protected]
>> Cc: "Ronald S. Bultje" <[email protected]>
>> Subject: [PATCH 1/3] prores: extract idct into its own dspcontext and merge 
>> with put_pixels.
>> Date: Mon,  3 Oct 2011 11:37:35 -0700
>> Message-Id: <[email protected]>
>> X-Mailer: git-send-email 1.7.6
>>
>> From: "Ronald S. Bultje" <[email protected]>
>>
>> ---
>>  libavcodec/Makefile    |    2 +-
>>  libavcodec/dsputil.c   |   65 +++++++++++++++++++++----------------
>>  libavcodec/proresdec.c |   83 
>> +++++++++++++-----------------------------------
>>  libavcodec/proresdsp.c |   61 +++++++++++++++++++++++++++++++++++
>>  libavcodec/proresdsp.h |   38 ++++++++++++++++++++++
>>  5 files changed, 159 insertions(+), 90 deletions(-)
>>  create mode 100644 libavcodec/proresdsp.c
>>  create mode 100644 libavcodec/proresdsp.h
>>
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index 3c4e2f8..b7b5124 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o
>>  OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
>>  OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
>>  OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
>> -OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o
>> +OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdsp.o
>>  OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
>>  OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
>>                                            celp_filters.o acelp_vectors.o \
>> diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
>> index 967406e..e248516 100644
>> --- a/libavcodec/dsputil.c
>> +++ b/libavcodec/dsputil.c
>> @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable 
>> *st, const uint8_t *src_s
>>      }
>>  }
>>
>> +void ff_init_scantable_permutation(uint8_t *idct_permutation,
>> +                                   int idct_permutation_type)
>> +{
>> +    int i;
>> +
>> +    switch(idct_permutation_type){
>> +    case FF_NO_IDCT_PERM:
>> +        for(i=0; i<64; i++)
>> +            idct_permutation[i]= i;
>> +        break;
>> +    case FF_LIBMPEG2_IDCT_PERM:
>> +        for(i=0; i<64; i++)
>> +            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 
>> 2);
>> +        break;
>> +    case FF_SIMPLE_IDCT_PERM:
>> +        for(i=0; i<64; i++)
>> +            idct_permutation[i]= simple_mmx_permutation[i];
>> +        break;
>> +    case FF_TRANSPOSE_IDCT_PERM:
>> +        for(i=0; i<64; i++)
>> +            idct_permutation[i]= ((i&7)<<3) | (i>>3);
>> +        break;
>> +    case FF_PARTTRANS_IDCT_PERM:
>> +        for(i=0; i<64; i++)
>> +            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
>> +        break;
>> +    case FF_SSE2_IDCT_PERM:
>> +        for(i=0; i<64; i++)
>> +            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
>> +        break;
>> +    default:
>> +        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not 
>> set\n");
>> +    }
>> +}
>> +
>>  static int pix_sum_c(uint8_t * pix, int line_size)
>>  {
>>      int s, i, j;
>> @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, 
>> AVCodecContext *avctx)
>>              c->avg_2tap_qpel_pixels_tab[0][i]= 
>> c->avg_h264_qpel_pixels_tab[0][i];
>>      }
>>
>> -    switch(c->idct_permutation_type){
>> -    case FF_NO_IDCT_PERM:
>> -        for(i=0; i<64; i++)
>> -            c->idct_permutation[i]= i;
>> -        break;
>> -    case FF_LIBMPEG2_IDCT_PERM:
>> -        for(i=0; i<64; i++)
>> -            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) 
>> << 2);
>> -        break;
>> -    case FF_SIMPLE_IDCT_PERM:
>> -        for(i=0; i<64; i++)
>> -            c->idct_permutation[i]= simple_mmx_permutation[i];
>> -        break;
>> -    case FF_TRANSPOSE_IDCT_PERM:
>> -        for(i=0; i<64; i++)
>> -            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
>> -        break;
>> -    case FF_PARTTRANS_IDCT_PERM:
>> -        for(i=0; i<64; i++)
>> -            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
>> -        break;
>> -    case FF_SSE2_IDCT_PERM:
>> -        for(i=0; i<64; i++)
>> -            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
>> -        break;
>> -    default:
>> -        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not 
>> set\n");
>> -    }
>> +    ff_init_scantable_permutation(c->idct_permutation,
>> +                                  c->idct_permutation_type);
>>  }
>> diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
>> index c70d145..0424093 100644
>> --- a/libavcodec/proresdec.c
>> +++ b/libavcodec/proresdec.c
>> @@ -34,17 +34,11 @@
>>
>>  #include "libavutil/intmath.h"
>>  #include "avcodec.h"
>> -#include "dsputil.h"
>> +#include "proresdsp.h"
>>  #include "get_bits.h"
>>
>> -#define BITS_PER_SAMPLE 10                              ///< output 
>> precision of that decoder
>> -#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for 
>> converting signed pixels into unsigned ones
>> -#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value 
>> for clipping resulting pixels
>> -#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value 
>> for clipping resulting pixels
>> -
>> -
>>  typedef struct {
>> -    DSPContext dsp;
>> +    ProresDSPContext dsp;
>>      AVFrame    picture;
>>      ScanTable  scantable;
>>      int        scantable_type;           ///< -1 = uninitialized, 0 = 
>> progressive, 1/2 = interlaced
>> @@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
>>
>>      avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
>>
>> -    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
>> -    dsputil_init(&ctx->dsp, avctx);
>> +    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
>> +    ff_proresdsp_init(&ctx->dsp);
>>
>>      avctx->coded_frame = &ctx->picture;
>>      avcodec_get_frame_defaults(&ctx->picture);
>> @@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, 
>> DCTELEM *out,
>>  }
>>
>>
>> -#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
>> -
>> -/**
>> - * Add bias value, clamp and output pixels of a slice
>> - */
>> -static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
>> -                       int mbs_per_slice, int blocks_per_mb)
>> -{
>> -    int mb, x, y, src_offset, dst_offset;
>> -    const DCTELEM *src1, *src2;
>> -    uint16_t *dst1, *dst2;
>> -
>> -    src1 = in;
>> -    src2 = in + (blocks_per_mb << 5);
>> -    dst1 = out;
>> -    dst2 = out + (stride << 3);
>> -
>> -    for (mb = 0; mb < mbs_per_slice; mb++) {
>> -        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
>> -            for (x = 0; x < 8; x++) {
>> -                src_offset = (y << 3) + x;
>> -
>> -                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
>> -                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
>> -
>> -                if (blocks_per_mb > 2) {
>> -                    dst1[dst_offset + x + 8] =
>> -                        CLIP_AND_BIAS(src1[src_offset + 64]);
>> -                    dst2[dst_offset + x + 8] =
>> -                        CLIP_AND_BIAS(src2[src_offset + 64]);
>> -                }
>> -            }
>> -        }
>> -
>> -        src1 += blocks_per_mb << 6;
>> -        src2 += blocks_per_mb << 6;
>> -        dst1 += blocks_per_mb << 2;
>> -        dst2 += blocks_per_mb << 2;
>> -    }
>> -}
>> -
>> -
>>  /**
>>   * Decode a slice plane (luma or chroma).
>>   */
>> @@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const 
>> uint8_t *buf,
>>  {
>>      GetBitContext gb;
>>      DCTELEM *block_ptr;
>> -    int i, blk_num, blocks_per_slice;
>> +    int i, j, mb_num, blocks_per_slice;
>>
>>      blocks_per_slice = mbs_per_slice * blocks_per_mb;
>>
>> @@ -518,20 +470,29 @@ static void decode_slice_plane(ProresContext *ctx, 
>> const uint8_t *buf,
>>      /* inverse quantization, inverse transform and output */
>>      block_ptr = ctx->blocks;
>>
>> -    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 
>> 64) {
>> +    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += 
>> blocks_per_mb * 4) {
>>          /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
>>           * and the input of the inverse transform should be scaled by 2
>>           * in order to avoid rounding errors.
>>           * Due to the fact the existing Libav transforms are incompatible 
>> with
>>           * that input I temporally introduced the coarse solution below... 
>> */
>> -        for (i = 0; i < 64; i++)
>> -            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
>> -
>> -        ctx->dsp.idct(block_ptr);
>> +        for (j = 0; j < blocks_per_mb; j++)
>> +            for (i = 0; i < 64; i++)
>> +                block_ptr[j * 64 + i] = (block_ptr[j * 64 + i] * qmat[i]) 
>> >> 2;
>
> Why is it still here instead of inside DCT (like VP3 does IIRC)?


What about attached? Patch is getting kind of big now (and I need to
redo the asm idct), but this should remove the TODO at the same time.

Ronald

0001-prores-extract-idct-into-its-own-dspcontext-and-merg.patch
Description: Binary data

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/3] prores: extract idct into its own dspcontext and merge with put_pixels.

Reply via email to