On Tue, Feb 17, 2015 at 10:03:29AM +0100, Clément Bœsch wrote:
> On Tue, Feb 17, 2015 at 04:31:07PM +0800, Zhaoxiu Zeng wrote:
> > From bf2964c07fde48c633ca4d8276282010e7c7f084 Mon Sep 17 00:00:00 2001
> > From: "zhaoxiu.zeng" <zhaoxiu.z...@gmail.com>
> > Date: Tue, 17 Feb 2015 16:03:47 +0800
> > Subject: [PATCH 1/1] avcodec: change type of ff_square_tab from uint32_t to
> >  uint16_t
> > 
> > uint16_t is big enough except the first element, but the first element
> > is never used.
> > This also macroize nsse function, and use ff_square_tab when possible.
> > 
> > Signed-off-by: zhaoxiu.zeng <zhaoxiu.z...@gmail.com>
> > ---
> >  libavcodec/me_cmp.c        | 94 
> > ++++++++++++++++++----------------------------
> >  libavcodec/me_cmp.h        |  3 +-
> >  libavcodec/mpegvideo_enc.c |  2 +-
> >  libavcodec/snowenc.c       |  2 +-
> >  4 files changed, 41 insertions(+), 60 deletions(-)
> > 
> [...]
> > -static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
> > -                    ptrdiff_t stride, int h)
> > -{
> > -    int score1 = 0, score2 = 0, x, y;
> > -
> > -    for (y = 0; y < h; y++) {
> > -        for (x = 0; x < 16; x++)
> > -            score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
> > -        if (y + 1 < h) {
> > -            for (x = 0; x < 15; x++)
> > -                score2 += FFABS(s1[x]     - s1[x + stride] -
> > -                                s1[x + 1] + s1[x + stride + 1]) -
> > -                          FFABS(s2[x]     - s2[x + stride] -
> > -                                s2[x + 1] + s2[x + stride + 1]);
> > -        }
> > -        s1 += stride;
> > -        s2 += stride;
> > -    }
> > -
> > -    if (c)
> > -        return score1 + FFABS(score2) * c->avctx->nsse_weight;
> > -    else
> > -        return score1 + FFABS(score2) * 8;
> > -}
> > -
> > -static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
> > -                   ptrdiff_t stride, int h)
> > -{
> > -    int score1 = 0, score2 = 0, x, y;
> > -
> > -    for (y = 0; y < h; y++) {
> > -        for (x = 0; x < 8; x++)
> > -            score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
> > -        if (y + 1 < h) {
> > -            for (x = 0; x < 7; x++)
> > -                score2 += FFABS(s1[x]     - s1[x + stride] -
> > -                                s1[x + 1] + s1[x + stride + 1]) -
> > -                          FFABS(s2[x]     - s2[x + stride] -
> > -                                s2[x + 1] + s2[x + stride + 1]);
> > -        }
> > -        s1 += stride;
> > -        s2 += stride;
> > -    }
> > -
> > -    if (c)
> > -        return score1 + FFABS(score2) * c->avctx->nsse_weight;
> > -    else
> > -        return score1 + FFABS(score2) * 8;
> > -}
> > +#define NSSE(size)                                                         
> > \
> > +static int nsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, 
> > \
> > +                              ptrdiff_t stride, int h)                     
> > \
> > +{                                                                          
> > \
> > +    int score1 = 0, score2 = 0, x, y;                                      
> > \
> > +    uint16_t *sq = ff_square_tab + 256;                                    
> > \
> > +                                                                           
> > \
> > +    for (y = 0; y < h; y++) {                                              
> > \
> > +        for (x = 0; x < size; x++)                                         
> > \
> > +            score1 += sq[s1[x] - s2[x]];                                   
> > \
> > +        if (y + 1 < h) {                                                   
> > \
> > +            for (x = 0; x < size - 1; x++)                                 
> > \
> > +                score2 += FFABS(s1[x]     - s1[x + stride] -               
> > \
> > +                                s1[x + 1] + s1[x + stride + 1]) -          
> > \
> > +                          FFABS(s2[x]     - s2[x + stride] -               
> > \
> > +                                s2[x + 1] + s2[x + stride + 1]);           
> > \
> > +        }                                                                  
> > \
> > +        s1 += stride;                                                      
> > \
> > +        s2 += stride;                                                      
> > \
> > +    }                                                                      
> > \
> > +                                                                           
> > \
> > +    if (c)                                                                 
> > \
> > +        return score1 + FFABS(score2) * c->avctx->nsse_weight;             
> > \
> > +    else                                                                   
> > \
> > +        return score1 + FFABS(score2) * 8;                                 
> > \
> > +}
> > +NSSE(8)
> > +NSSE(16)
> > 
> 
> You can do better than this:
> 
> static inline int nsse_base(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
>                             ptrdiff_t stride, int h, const int size)
> {
>     int score1 = 0, score2 = 0, x, y;
> 
>     for (y = 0; y < h; y++) {
>         for (x = 0; x < size; x++)
>             score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
>         if (y + 1 < h) {
>             for (x = 0; x < size - 1; x++)
>                 score2 += FFABS(s1[x]     - s1[x + stride] -
>                                 s1[x + 1] + s1[x + stride + 1]) -
>                           FFABS(s2[x]     - s2[x + stride] -
>                                 s2[x + 1] + s2[x + stride + 1]);
>         }
>         s1 += stride;
>         s2 += stride;
>     }
> 
>     if (c)
>         return score1 + FFABS(score2) * c->avctx->nsse_weight;
>     else
>         return score1 + FFABS(score2) * 8;
> }
> 
> #define DEFINE_NSSE_FUNC(s)                                         \
> static nsse_##s##_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,    \
>                     ptrdiff_t stride, int h)                        \

add int here

> {                                                                   \
>     nsse_base(c, s1, s2, stride, h, s);                             \

and return here

anyway, you get the idea.

[...]

-- 
Clément B.

Attachment: pgpm8H3f4TmY1.pgp
Description: PGP signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to