Re: [libav-devel] [PATCH] pixlet: Fix architecture-dependent code and values

2017-03-07 Thread Luca Barbato
On 06/03/2017 23:34, Vittorio Giovara wrote:
> Note: some of these values do not fit in int32_t, so I'm adding
> INT64_C() to all of them, if that's ok.

Sure.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] pixlet: Fix architecture-dependent code and values

2017-03-06 Thread Vittorio Giovara
On Thu, Mar 2, 2017 at 4:34 PM, Vittorio Giovara
 wrote:
> @@ -389,29 +387,29 @@ static void filterfn(int16_t *dest, int16_t *tmp, 
> size_t size, float SCALE)
>  }
>
>  for (i = 0; i < hsize; i++) {
> -value = low [i + 1] * -0.07576144003329376f +
> -low [i + 0] *  0.8586296626673486f  +
> -low [i - 1] * -0.07576144003329376f +
> -high[i + 0] *  0.3535533905932737f  +
> -high[i - 1] *  0.3535533905932737f;
> -dest[i * 2] = av_clipf(value * SCALE, INT16_MIN, INT16_MAX);
> +value = (int64_t) low [i + 1] * -325392907  +
> +(int64_t) low [i + 0] *  3687786320 +
> +(int64_t) low [i - 1] * -325392907  +
> +(int64_t) high[i + 0] *  1518500249 +
> +(int64_t) high[i - 1] *  1518500249;
> +dest[i * 2] = av_clip_int16(((value >> 32) * scale) >> 32);
>  }
>
>  for (i = 0; i < hsize; i++) {
> -value = low [i + 2] * -0.01515228715813062f +
> -low [i + 1] *  0.3687056777514043f  +
> -low [i + 0] *  0.3687056777514043f  +
> -low [i - 1] * -0.01515228715813062f +
> -high[i + 1] *  0.07071067811865475f +
> -high[i + 0] * -0.8485281374238569f  +
> -high[i - 1] *  0.07071067811865475f;
> -dest[i * 2 + 1] = av_clipf(value * SCALE, INT16_MIN, INT16_MAX);
> +value = (int64_t) low [i + 2] * -65078576   +
> +(int64_t) low [i + 1] *  1583578880 +
> +(int64_t) low [i + 0] *  1583578880 +
> +(int64_t) low [i - 1] * -65078576   +
> +(int64_t) high[i + 1] *  303700064  +
> +(int64_t) high[i + 0] * -3644400640 +
> +(int64_t) high[i - 1] *  303700064;
> +dest[i * 2 + 1] = av_clip_int16(((value >> 32) * scale) >> 32);
>  }

Note: some of these values do not fit in int32_t, so I'm adding
INT64_C() to all of them, if that's ok.
-- 
Vittorio
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] pixlet: Fix architecture-dependent code and values

2017-03-03 Thread Luca Barbato
On 02/03/2017 22:34, Vittorio Giovara wrote:
> Constants were using floating point precision which caused different
> values to be generated on different architectures. Additionally on
> big endian machines, the test would output bytes in native order which
> is different from the one hardcoded in test.
> 
> So, eradicate the use of floats and use fixed point (32.32) arithmetics
> everywhere, replacing floating constants with precomputed integer values,
> and force the pixel format output to be the same in the fate test.
> 

Sounds good.

lu

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] pixlet: Fix architecture-dependent code and values

2017-03-02 Thread Vittorio Giovara
Constants were using floating point precision which caused different
values to be generated on different architectures. Additionally on
big endian machines, the test would output bytes in native order which
is different from the one hardcoded in test.

So, eradicate the use of floats and use fixed point (32.32) arithmetics
everywhere, replacing floating constants with precomputed integer values,
and force the pixel format output to be the same in the fate test.

Signed-off-by: Vittorio Giovara 
---
 libavcodec/pixlet.c   | 58 +++
 tests/fate/video.mak  |  2 +-
 tests/ref/fate/pixlet |  2 +-
 3 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/libavcodec/pixlet.c b/libavcodec/pixlet.c
index 3b4ff42365..5f16759e96 100644
--- a/libavcodec/pixlet.c
+++ b/libavcodec/pixlet.c
@@ -39,8 +39,6 @@
 #define H 0
 #define V 1
 
-#define SQR(x) ((x) * (x))
-
 typedef struct SubBand {
 size_t width, height;
 size_t size;
@@ -59,7 +57,7 @@ typedef struct PixletContext {
 
 int16_t *filter[2];
 int16_t *prediction;
-float scaling[4][2][NB_LEVELS];
+int64_t scaling[4][2][NB_LEVELS];
 SubBand band[4][NB_LEVELS * 3 + 1];
 } PixletContext;
 
@@ -364,11 +362,11 @@ static void lowpass_prediction(int16_t *dst, int16_t 
*pred,
 }
 }
 
-static void filterfn(int16_t *dest, int16_t *tmp, size_t size, float SCALE)
+static void filterfn(int16_t *dest, int16_t *tmp, size_t size, int64_t scale)
 {
 int16_t *low, *high, *ll, *lh, *hl, *hh;
 int hsize, i, j;
-float value;
+int64_t value;
 
 hsize = size >> 1;
 low   = tmp + 4;
@@ -389,29 +387,29 @@ static void filterfn(int16_t *dest, int16_t *tmp, size_t 
size, float SCALE)
 }
 
 for (i = 0; i < hsize; i++) {
-value = low [i + 1] * -0.07576144003329376f +
-low [i + 0] *  0.8586296626673486f  +
-low [i - 1] * -0.07576144003329376f +
-high[i + 0] *  0.3535533905932737f  +
-high[i - 1] *  0.3535533905932737f;
-dest[i * 2] = av_clipf(value * SCALE, INT16_MIN, INT16_MAX);
+value = (int64_t) low [i + 1] * -325392907  +
+(int64_t) low [i + 0] *  3687786320 +
+(int64_t) low [i - 1] * -325392907  +
+(int64_t) high[i + 0] *  1518500249 +
+(int64_t) high[i - 1] *  1518500249;
+dest[i * 2] = av_clip_int16(((value >> 32) * scale) >> 32);
 }
 
 for (i = 0; i < hsize; i++) {
-value = low [i + 2] * -0.01515228715813062f +
-low [i + 1] *  0.3687056777514043f  +
-low [i + 0] *  0.3687056777514043f  +
-low [i - 1] * -0.01515228715813062f +
-high[i + 1] *  0.07071067811865475f +
-high[i + 0] * -0.8485281374238569f  +
-high[i - 1] *  0.07071067811865475f;
-dest[i * 2 + 1] = av_clipf(value * SCALE, INT16_MIN, INT16_MAX);
+value = (int64_t) low [i + 2] * -65078576   +
+(int64_t) low [i + 1] *  1583578880 +
+(int64_t) low [i + 0] *  1583578880 +
+(int64_t) low [i - 1] * -65078576   +
+(int64_t) high[i + 1] *  303700064  +
+(int64_t) high[i + 0] * -3644400640 +
+(int64_t) high[i - 1] *  303700064;
+dest[i * 2 + 1] = av_clip_int16(((value >> 32) * scale) >> 32);
 }
 }
 
 static void reconstruction(AVCodecContext *avctx, int16_t *dest,
size_t width, size_t height, ptrdiff_t stride,
-   float *scaling_h, float *scaling_v)
+   int64_t *scaling_h, int64_t *scaling_v)
 {
 PixletContext *ctx = avctx->priv_data;
 unsigned scaled_width, scaled_height;
@@ -423,8 +421,8 @@ static void reconstruction(AVCodecContext *avctx, int16_t 
*dest,
 tmp   = ctx->filter[0];
 
 for (i = 0; i < NB_LEVELS; i++) {
-float scale_v = scaling_v[i];
-float scale_h = scaling_h[i];
+int64_t scale_v = scaling_v[i];
+int64_t scale_h = scaling_h[i];
 scaled_width  <<= 1;
 scaled_height <<= 1;
 
@@ -457,12 +455,18 @@ static void postprocess_luma(AVFrame *frame, size_t w, 
size_t h, int depth)
 uint16_t *dsty = (uint16_t *)frame->data[0];
 int16_t *srcy  = (int16_t *)frame->data[0];
 ptrdiff_t stridey = frame->linesize[0] / 2;
-const float factor = 1.0f / ((1 << depth) - 1);
 int i, j;
 
 for (j = 0; j < h; j++) {
-for (i = 0; i < w; i++)
-dsty[i] = SQR(FFMAX(srcy[i], 0) * factor) * 65535;
+for (i = 0; i < w; i++) {
+if (srcy[i] <= 0)
+dsty[i] = 0;
+else if (srcy[i] > ((1 << depth) - 1))
+dsty[i] = 65535;
+else
+dsty[i] = ((int64_t)srcy[i] * srcy[i] * 65535) /
+  ((1 << depth) - 1) / ((1 << depth) - 1);
+