On Sat, Apr 19, 2014 at 06:55:04PM -0400, Derek Buitenhuis wrote:
> From: Kostya Shishkov <[email protected]>
> 
> Signed-off-by: Derek Buitenhuis <[email protected]>
> ---
> The static inline version is just as fast as the macro templaed one,
> and far less ugly. I tested.

How does one test ugliness?

> ---
>  configure        |  2 +-
>  libavcodec/fic.c | 63 
> ++++++++++++++++++++++++++++++++++++++++++++++----------
>  2 files changed, 53 insertions(+), 12 deletions(-)
> 
> diff --git a/configure b/configure
> index 63a9e72..65a9cf8 100755
> --- a/configure
> +++ b/configure
> @@ -1737,7 +1737,7 @@ ffv1_decoder_select="golomb rangecoder"
>  ffv1_encoder_select="rangecoder"
>  ffvhuff_decoder_select="huffyuv_decoder"
>  ffvhuff_encoder_select="huffyuv_encoder"
> -fic_decoder_select="dsputil golomb"
> +fic_decoder_select="golomb"
>  flac_decoder_select="golomb"
>  flac_encoder_select="dsputil golomb lpc"
>  flashsv_decoder_deps="zlib"
> diff --git a/libavcodec/fic.c b/libavcodec/fic.c
> index df03437..2d9da65 100644
> --- a/libavcodec/fic.c
> +++ b/libavcodec/fic.c
> @@ -24,7 +24,6 @@
>  #include "libavutil/common.h"
>  #include "avcodec.h"
>  #include "internal.h"
> -#include "dsputil.h"
>  #include "get_bits.h"
>  #include "golomb.h"
>  
> @@ -40,9 +39,6 @@ typedef struct FICContext {
>      AVCodecContext *avctx;
>      AVFrame *frame;
>  
> -    DSPContext dsp;
> -    ScanTable scantable;
> -
>      FICThreadContext *slice_data;
>      int slice_data_size;
>  
> @@ -80,6 +76,55 @@ static const uint8_t fic_header[7] = { 0, 0, 1, 'F', 'I', 
> 'C', 'V' };
>  
>  #define FIC_HEADER_SIZE 27
>  
> +static av_always_inline void fic_idct(int16_t *blk, int step, int shift)
> +{
> +    const int t0 =  27246 * blk[3 * step] + 18405 * blk[5 * step];
> +    const int t1 =  27246 * blk[5 * step] - 18405 * blk[3 * step];
> +    const int t2 =   6393 * blk[7 * step] + 32139 * blk[1 * step];
> +    const int t3 =   6393 * blk[1 * step] - 32139 * blk[7 * step];
> +    const int t4 = 5793 * (t2 + t0 + 0x800 >> 12);
> +    const int t5 = 5793 * (t3 + t1 + 0x800 >> 12);
> +    const int t6 = t2 - t0;
> +    const int t7 = t3 - t1;
> +    const int t8 =  17734 * blk[2 * step] - 42813 * blk[6 * step];
> +    const int t9 =  17734 * blk[6 * step] + 42814 * blk[2 * step];
> +    const int tA = (blk[0 * step] - blk[4 * step] << 15) + (1 << shift - 1);
> +    const int tB = (blk[0 * step] + blk[4 * step] << 15) + (1 << shift - 1);
> +    blk[0 * step] = (  t4       + t9 + tB) >> shift;
> +    blk[1 * step] = (  t6 + t7  + t8 + tA) >> shift;
> +    blk[2 * step] = (  t6 - t7  - t8 + tA) >> shift;
> +    blk[3 * step] = (  t5       - t9 + tB) >> shift;
> +    blk[4 * step] = ( -t5       - t9 + tB) >> shift;
> +    blk[5 * step] = (-(t6 - t7) - t8 + tA) >> shift;
> +    blk[6 * step] = (-(t6 + t7) + t8 + tA) >> shift;
> +    blk[7 * step] = ( -t4       + t9 + tB) >> shift;
> +}
> +
> +static void fic_idct_put(uint8_t *dst, int stride, int16_t *block)
> +{
> +    int i, j;
> +    int16_t *ptr;
> +
> +    ptr = block;
> +    for (i = 0; i < 8; i++) {
> +        fic_idct(ptr, 8, 13);
> +        ptr++;
> +    }
> +
> +    ptr = block;
> +    for (i = 0; i < 8; i++) {
> +        fic_idct(ptr, 1, 20);
> +        ptr += 8;
> +    }
> +
> +    ptr = block;
> +    for (j = 0; j < 8; j++) {
> +        for (i = 0; i < 8; i++)
> +            dst[i] = av_clip_uint8(ptr[i]);
> +        dst += stride;
> +        ptr += 8;
> +    }
> +}
>  static int fic_decode_block(FICContext *ctx, GetBitContext *gb,
>                              uint8_t *dst, int stride, int16_t *block)
>  {
> @@ -94,16 +139,16 @@ static int fic_decode_block(FICContext *ctx, 
> GetBitContext *gb,
>          return 0;
>      }
>  
> -    ctx->dsp.clear_block(block);
> +    memset(block, 0, sizeof(int16_t) * 64);

sizeof(*block) please
  
>      num_coeff = get_bits(gb, 7);
>      if (num_coeff > 64)
>          return AVERROR_INVALIDDATA;
>  
>      for (i = 0; i < num_coeff; i++)
> -        block[ctx->scantable.permutated[i]] = get_se_golomb(gb) * 
> ctx->qmat[i];
> +        block[ff_zigzag_direct[i]] = get_se_golomb(gb) * ctx->qmat[i];
>  
> -    ctx->dsp.idct_put(dst, stride, block);
> +    fic_idct_put(dst, stride, block);
>  
>      return 0;
>  }
> @@ -283,10 +328,6 @@ static av_cold int fic_decode_init(AVCodecContext *avctx)
>      if (!ctx->frame)
>          return AVERROR(ENOMEM);
>  
> -    ff_dsputil_init(&ctx->dsp, avctx);
> -
> -    ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, 
> ff_zigzag_direct);
> -
>      return 0;
>  }
>  
> -- 

other than that LGTM
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to