[FFmpeg-devel] [PATCH] avcodec: change type of ff_square_tab from uint32_t to uint16_t

2015-02-17 Thread Zhaoxiu Zeng
From bf2964c07fde48c633ca4d8276282010e7c7f084 Mon Sep 17 00:00:00 2001
From: zhaoxiu.zeng zhaoxiu.z...@gmail.com
Date: Tue, 17 Feb 2015 16:03:47 +0800
Subject: [PATCH 1/1] avcodec: change type of ff_square_tab from uint32_t to
 uint16_t

uint16_t is big enough except the first element, but the first element
is never used.
This also macroize nsse function, and use ff_square_tab when possible.

Signed-off-by: zhaoxiu.zeng zhaoxiu.z...@gmail.com
---
 libavcodec/me_cmp.c| 94 ++
 libavcodec/me_cmp.h|  3 +-
 libavcodec/mpegvideo_enc.c |  2 +-
 libavcodec/snowenc.c   |  2 +-
 4 files changed, 41 insertions(+), 60 deletions(-)

diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c
index d4213d2..a7a90b7 100644
--- a/libavcodec/me_cmp.c
+++ b/libavcodec/me_cmp.c
@@ -29,13 +29,13 @@
 #include mpegvideo.h
 #include config.h

-uint32_t ff_square_tab[512] = { 0, };
+uint16_t ff_square_tab[512] = { 0, };

 static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
   ptrdiff_t stride, int h)
 {
 int s = 0, i;
-uint32_t *sq = ff_square_tab + 256;
+uint16_t *sq = ff_square_tab + 256;

 for (i = 0; i  h; i++) {
 s+= sq[pix1[0] - pix2[0]];
@@ -52,7 +52,7 @@ static int sse8_c(MpegEncContext *v, uint8_t *pix1,
uint8_t *pix2,
   ptrdiff_t stride, int h)
 {
 int s = 0, i;
-uint32_t *sq = ff_square_tab + 256;
+uint16_t *sq = ff_square_tab + 256;

 for (i = 0; i  h; i++) {
 s+= sq[pix1[0] - pix2[0]];
@@ -73,7 +73,7 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1,
uint8_t *pix2,
ptrdiff_t stride, int h)
 {
 int s = 0, i;
-uint32_t *sq = ff_square_tab + 256;
+uint16_t *sq = ff_square_tab + 256;

 for (i = 0; i  h; i++) {
 s += sq[pix1[0]  - pix2[0]];
@@ -311,55 +311,34 @@ static int pix_abs8_xy2_c(MpegEncContext *v,
uint8_t *pix1, uint8_t *pix2,
 return s;
 }

-static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
-ptrdiff_t stride, int h)
-{
-int score1 = 0, score2 = 0, x, y;
-
-for (y = 0; y  h; y++) {
-for (x = 0; x  16; x++)
-score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
-if (y + 1  h) {
-for (x = 0; x  15; x++)
-score2 += FFABS(s1[x] - s1[x + stride] -
-s1[x + 1] + s1[x + stride + 1]) -
-  FFABS(s2[x] - s2[x + stride] -
-s2[x + 1] + s2[x + stride + 1]);
-}
-s1 += stride;
-s2 += stride;
-}
-
-if (c)
-return score1 + FFABS(score2) * c-avctx-nsse_weight;
-else
-return score1 + FFABS(score2) * 8;
-}
-
-static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
-   ptrdiff_t stride, int h)
-{
-int score1 = 0, score2 = 0, x, y;
-
-for (y = 0; y  h; y++) {
-for (x = 0; x  8; x++)
-score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
-if (y + 1  h) {
-for (x = 0; x  7; x++)
-score2 += FFABS(s1[x] - s1[x + stride] -
-s1[x + 1] + s1[x + stride + 1]) -
-  FFABS(s2[x] - s2[x + stride] -
-s2[x + 1] + s2[x + stride + 1]);
-}
-s1 += stride;
-s2 += stride;
-}
-
-if (c)
-return score1 + FFABS(score2) * c-avctx-nsse_weight;
-else
-return score1 + FFABS(score2) * 8;
-}
+#define NSSE(size) \
+static int nsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \
+  ptrdiff_t stride, int h) \
+{  \
+int score1 = 0, score2 = 0, x, y;  \
+uint16_t *sq = ff_square_tab + 256;\
+   \
+for (y = 0; y  h; y++) {  \
+for (x = 0; x  size; x++) \
+score1 += sq[s1[x] - s2[x]];   \
+if (y + 1  h) {   \
+for (x = 0; x  size - 1; x++) \
+score2 += FFABS(s1[x] - s1[x + stride] -   \
+s1[x + 1] + s1[x + stride + 1]) -  \
+  FFABS(s2[x] - s2[x + stride] -   \
+s2[x + 1] + s2[x + stride + 1]);   \
+}  \
+s1 += stride;  \
+s2 += stride;

Re: [FFmpeg-devel] [PATCH] avcodec: change type of ff_square_tab from uint32_t to uint16_t

2015-02-17 Thread Clément Bœsch
On Tue, Feb 17, 2015 at 04:31:07PM +0800, Zhaoxiu Zeng wrote:
 From bf2964c07fde48c633ca4d8276282010e7c7f084 Mon Sep 17 00:00:00 2001
 From: zhaoxiu.zeng zhaoxiu.z...@gmail.com
 Date: Tue, 17 Feb 2015 16:03:47 +0800
 Subject: [PATCH 1/1] avcodec: change type of ff_square_tab from uint32_t to
  uint16_t
 
 uint16_t is big enough except the first element, but the first element
 is never used.
 This also macroize nsse function, and use ff_square_tab when possible.
 
 Signed-off-by: zhaoxiu.zeng zhaoxiu.z...@gmail.com
 ---
  libavcodec/me_cmp.c| 94 
 ++
  libavcodec/me_cmp.h|  3 +-
  libavcodec/mpegvideo_enc.c |  2 +-
  libavcodec/snowenc.c   |  2 +-
  4 files changed, 41 insertions(+), 60 deletions(-)
 
[...]
 -static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
 -ptrdiff_t stride, int h)
 -{
 -int score1 = 0, score2 = 0, x, y;
 -
 -for (y = 0; y  h; y++) {
 -for (x = 0; x  16; x++)
 -score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
 -if (y + 1  h) {
 -for (x = 0; x  15; x++)
 -score2 += FFABS(s1[x] - s1[x + stride] -
 -s1[x + 1] + s1[x + stride + 1]) -
 -  FFABS(s2[x] - s2[x + stride] -
 -s2[x + 1] + s2[x + stride + 1]);
 -}
 -s1 += stride;
 -s2 += stride;
 -}
 -
 -if (c)
 -return score1 + FFABS(score2) * c-avctx-nsse_weight;
 -else
 -return score1 + FFABS(score2) * 8;
 -}
 -
 -static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
 -   ptrdiff_t stride, int h)
 -{
 -int score1 = 0, score2 = 0, x, y;
 -
 -for (y = 0; y  h; y++) {
 -for (x = 0; x  8; x++)
 -score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
 -if (y + 1  h) {
 -for (x = 0; x  7; x++)
 -score2 += FFABS(s1[x] - s1[x + stride] -
 -s1[x + 1] + s1[x + stride + 1]) -
 -  FFABS(s2[x] - s2[x + stride] -
 -s2[x + 1] + s2[x + stride + 1]);
 -}
 -s1 += stride;
 -s2 += stride;
 -}
 -
 -if (c)
 -return score1 + FFABS(score2) * c-avctx-nsse_weight;
 -else
 -return score1 + FFABS(score2) * 8;
 -}
 +#define NSSE(size) \
 +static int nsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \
 +  ptrdiff_t stride, int h) \
 +{  \
 +int score1 = 0, score2 = 0, x, y;  \
 +uint16_t *sq = ff_square_tab + 256;\
 +   \
 +for (y = 0; y  h; y++) {  \
 +for (x = 0; x  size; x++) \
 +score1 += sq[s1[x] - s2[x]];   \
 +if (y + 1  h) {   \
 +for (x = 0; x  size - 1; x++) \
 +score2 += FFABS(s1[x] - s1[x + stride] -   \
 +s1[x + 1] + s1[x + stride + 1]) -  \
 +  FFABS(s2[x] - s2[x + stride] -   \
 +s2[x + 1] + s2[x + stride + 1]);   \
 +}  \
 +s1 += stride;  \
 +s2 += stride;  \
 +}  \
 +   \
 +if (c) \
 +return score1 + FFABS(score2) * c-avctx-nsse_weight; \
 +else   \
 +return score1 + FFABS(score2) * 8; \
 +}
 +NSSE(8)
 +NSSE(16)
 

You can do better than this:

static inline int nsse_base(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
ptrdiff_t stride, int h, const int size)
{
int score1 = 0, score2 = 0, x, y;

for (y = 0; y  h; y++) {
for (x = 0; x  size; x++)
score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
if (y + 1  h) {
for (x = 0; x  size - 1; x++)
score2 += FFABS(s1[x] - s1[x + stride] -
s1[x + 1] + s1[x + stride + 1]) -
  FFABS(s2[x] - s2[x + stride] -
  

Re: [FFmpeg-devel] [PATCH] avcodec: change type of ff_square_tab from uint32_t to uint16_t

2015-02-17 Thread Clément Bœsch
On Tue, Feb 17, 2015 at 10:03:29AM +0100, Clément Bœsch wrote:
 On Tue, Feb 17, 2015 at 04:31:07PM +0800, Zhaoxiu Zeng wrote:
  From bf2964c07fde48c633ca4d8276282010e7c7f084 Mon Sep 17 00:00:00 2001
  From: zhaoxiu.zeng zhaoxiu.z...@gmail.com
  Date: Tue, 17 Feb 2015 16:03:47 +0800
  Subject: [PATCH 1/1] avcodec: change type of ff_square_tab from uint32_t to
   uint16_t
  
  uint16_t is big enough except the first element, but the first element
  is never used.
  This also macroize nsse function, and use ff_square_tab when possible.
  
  Signed-off-by: zhaoxiu.zeng zhaoxiu.z...@gmail.com
  ---
   libavcodec/me_cmp.c| 94 
  ++
   libavcodec/me_cmp.h|  3 +-
   libavcodec/mpegvideo_enc.c |  2 +-
   libavcodec/snowenc.c   |  2 +-
   4 files changed, 41 insertions(+), 60 deletions(-)
  
 [...]
  -static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
  -ptrdiff_t stride, int h)
  -{
  -int score1 = 0, score2 = 0, x, y;
  -
  -for (y = 0; y  h; y++) {
  -for (x = 0; x  16; x++)
  -score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
  -if (y + 1  h) {
  -for (x = 0; x  15; x++)
  -score2 += FFABS(s1[x] - s1[x + stride] -
  -s1[x + 1] + s1[x + stride + 1]) -
  -  FFABS(s2[x] - s2[x + stride] -
  -s2[x + 1] + s2[x + stride + 1]);
  -}
  -s1 += stride;
  -s2 += stride;
  -}
  -
  -if (c)
  -return score1 + FFABS(score2) * c-avctx-nsse_weight;
  -else
  -return score1 + FFABS(score2) * 8;
  -}
  -
  -static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
  -   ptrdiff_t stride, int h)
  -{
  -int score1 = 0, score2 = 0, x, y;
  -
  -for (y = 0; y  h; y++) {
  -for (x = 0; x  8; x++)
  -score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
  -if (y + 1  h) {
  -for (x = 0; x  7; x++)
  -score2 += FFABS(s1[x] - s1[x + stride] -
  -s1[x + 1] + s1[x + stride + 1]) -
  -  FFABS(s2[x] - s2[x + stride] -
  -s2[x + 1] + s2[x + stride + 1]);
  -}
  -s1 += stride;
  -s2 += stride;
  -}
  -
  -if (c)
  -return score1 + FFABS(score2) * c-avctx-nsse_weight;
  -else
  -return score1 + FFABS(score2) * 8;
  -}
  +#define NSSE(size) 
  \
  +static int nsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, 
  \
  +  ptrdiff_t stride, int h) 
  \
  +{  
  \
  +int score1 = 0, score2 = 0, x, y;  
  \
  +uint16_t *sq = ff_square_tab + 256;
  \
  +   
  \
  +for (y = 0; y  h; y++) {  
  \
  +for (x = 0; x  size; x++) 
  \
  +score1 += sq[s1[x] - s2[x]];   
  \
  +if (y + 1  h) {   
  \
  +for (x = 0; x  size - 1; x++) 
  \
  +score2 += FFABS(s1[x] - s1[x + stride] -   
  \
  +s1[x + 1] + s1[x + stride + 1]) -  
  \
  +  FFABS(s2[x] - s2[x + stride] -   
  \
  +s2[x + 1] + s2[x + stride + 1]);   
  \
  +}  
  \
  +s1 += stride;  
  \
  +s2 += stride;  
  \
  +}  
  \
  +   
  \
  +if (c) 
  \
  +return score1 + FFABS(score2) * c-avctx-nsse_weight; 
  \
  +else   
  \
  +return score1 + FFABS(score2) * 8; 
  \
  +}
  +NSSE(8)
  +NSSE(16)
  
 
 You can do better than this:
 
 static inline int nsse_base(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
 ptrdiff_t stride, int h, const int size)
 {
 int score1 = 0, score2 = 0, x, y;
 
 for (y = 0; y  h; y++) {
 for (x = 0; x  size; x++)
 score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);