Re: [FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support

2022-12-04 Thread Mark Reid
On Sun, Dec 4, 2022 at 4:05 PM Michael Niedermayer 
wrote:

> On Wed, Nov 23, 2022 at 11:35:40AM -0800, mindm...@gmail.com wrote:
> > From: Mark Reid 
> >
> > ---
> >  libswscale/output.c  | 92 
> >  libswscale/swscale_unscaled.c|  4 +-
> >  libswscale/tests/floatimg_cmp.c  |  4 +-
> >  libswscale/utils.c   | 16 +++--
> >  libswscale/yuv2rgb.c |  2 +
> >  tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
> >  tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
> >  tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
> >  tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
> >  tests/ref/fate/filter-pixfmts-copy   |  4 ++
> >  tests/ref/fate/filter-pixfmts-crop   |  4 ++
> >  tests/ref/fate/filter-pixfmts-field  |  4 ++
> >  tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
> >  tests/ref/fate/filter-pixfmts-hflip  |  4 ++
> >  tests/ref/fate/filter-pixfmts-il |  4 ++
> >  tests/ref/fate/filter-pixfmts-null   |  4 ++
> >  tests/ref/fate/filter-pixfmts-scale  |  4 ++
> >  tests/ref/fate/filter-pixfmts-transpose  |  4 ++
> >  tests/ref/fate/filter-pixfmts-vflip  |  4 ++
> >  tests/ref/fate/sws-floatimg-cmp  | 16 +
> >  20 files changed, 170 insertions(+), 8 deletions(-)
> >  create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
> >  create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
> >  create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
> >  create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le
> >
> > diff --git a/libswscale/output.c b/libswscale/output.c
> > index 5c85bff971..1d86a244f9 100644
> > --- a/libswscale/output.c
> > +++ b/libswscale/output.c
> > @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t
> *lumFilter,
> >  }
> >  }
> >
> > +static void
> > +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
> > +const int16_t **lumSrcx, int lumFilterSize,
> > +const int16_t *chrFilter, const int16_t **chrUSrcx,
> > +const int16_t **chrVSrcx, int chrFilterSize,
> > +const int16_t **alpSrcx, uint8_t *dest,
> > +int dstW, int y)
> > +{
> > +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
> > +int i;
> > +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
> > +int hasAlpha = alpha && alpSrcx;
> > +int pixelStep = alpha ? 4 : 3;
> > +uint32_t *dest32 = (uint32_t*)dest;
> > +const int32_t **lumSrc  = (const int32_t**)lumSrcx;
> > +const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
> > +const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
> > +const int32_t **alpSrc  = (const int32_t**)alpSrcx;
> > +static const float float_mult = 1.0f / 65535.0f;
> > +uint32_t a = av_float2int(1.0f);
> > +
> > +for (i = 0; i < dstW; i++) {
> > +int j;
> > +int Y = -0x4000;
> > +int U = -(128 << 23);
> > +int V = -(128 << 23);
> > +int R, G, B, A;
> > +
> > +for (j = 0; j < lumFilterSize; j++)
> > +Y += lumSrc[j][i] * (unsigned)lumFilter[j];
> > +
> > +for (j = 0; j < chrFilterSize; j++) {
> > +U += chrUSrc[j][i] * (unsigned)chrFilter[j];
> > +V += chrVSrc[j][i] * (unsigned)chrFilter[j];
> > +}
> > +
> > +Y >>= 14;
> > +Y += 0x1;
> > +U >>= 14;
> > +V >>= 14;
> > +
> > +if (hasAlpha) {
> > +A = -0x4000;
> > +
> > +for (j = 0; j < lumFilterSize; j++)
> > +A += alpSrc[j][i] * (unsigned)lumFilter[j];
> > +
> > +A >>= 1;
> > +A += 0x20002000;
> > +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30)
> >> 14));
> > +}
> > +
> > +Y -= c->yuv2rgb_y_offset;
> > +Y *= c->yuv2rgb_y_coeff;
> > +Y += (1 << 13) - (1 << 29);
> > +R = V * c->yuv2rgb_v2r_coeff;
> > +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> > +B =U * c->yuv2rgb_u2b_coeff;
> > +
> > +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
> > +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
> > +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
> > +
> > +dest32[0] = av_float2int(float_mult * (float)R);
> > +dest32[1] = av_float2int(float_mult * (float)G);
> > +dest32[2] = av_float2int(float_mult * (float)B);
> > +if (alpha)
> > +dest32[3] = a;
>
> why is this using uint32_t with av_float2int() and not floats straight ?
>
>
It's this way because it is matching the planar f32 version, I will change
both.


>
>
> > +
> > +dest32 += pixelStep;
> > +}
> > +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
> > +dest32 = (uint32_t*)dest;
> > +for (i = 0; i < dstW; i++) {
> > +dest32

Re: [FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support

2022-12-04 Thread Michael Niedermayer
On Wed, Nov 23, 2022 at 11:35:40AM -0800, mindm...@gmail.com wrote:
> From: Mark Reid 
> 
> ---
>  libswscale/output.c  | 92 
>  libswscale/swscale_unscaled.c|  4 +-
>  libswscale/tests/floatimg_cmp.c  |  4 +-
>  libswscale/utils.c   | 16 +++--
>  libswscale/yuv2rgb.c |  2 +
>  tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
>  tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
>  tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
>  tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
>  tests/ref/fate/filter-pixfmts-copy   |  4 ++
>  tests/ref/fate/filter-pixfmts-crop   |  4 ++
>  tests/ref/fate/filter-pixfmts-field  |  4 ++
>  tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
>  tests/ref/fate/filter-pixfmts-hflip  |  4 ++
>  tests/ref/fate/filter-pixfmts-il |  4 ++
>  tests/ref/fate/filter-pixfmts-null   |  4 ++
>  tests/ref/fate/filter-pixfmts-scale  |  4 ++
>  tests/ref/fate/filter-pixfmts-transpose  |  4 ++
>  tests/ref/fate/filter-pixfmts-vflip  |  4 ++
>  tests/ref/fate/sws-floatimg-cmp  | 16 +
>  20 files changed, 170 insertions(+), 8 deletions(-)
>  create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
>  create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
>  create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
>  create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le
> 
> diff --git a/libswscale/output.c b/libswscale/output.c
> index 5c85bff971..1d86a244f9 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
> *lumFilter,
>  }
>  }
>  
> +static void
> +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
> +const int16_t **lumSrcx, int lumFilterSize,
> +const int16_t *chrFilter, const int16_t **chrUSrcx,
> +const int16_t **chrVSrcx, int chrFilterSize,
> +const int16_t **alpSrcx, uint8_t *dest,
> +int dstW, int y)
> +{
> +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
> +int i;
> +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
> +int hasAlpha = alpha && alpSrcx;
> +int pixelStep = alpha ? 4 : 3;
> +uint32_t *dest32 = (uint32_t*)dest;
> +const int32_t **lumSrc  = (const int32_t**)lumSrcx;
> +const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
> +const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
> +const int32_t **alpSrc  = (const int32_t**)alpSrcx;
> +static const float float_mult = 1.0f / 65535.0f;
> +uint32_t a = av_float2int(1.0f);
> +
> +for (i = 0; i < dstW; i++) {
> +int j;
> +int Y = -0x4000;
> +int U = -(128 << 23);
> +int V = -(128 << 23);
> +int R, G, B, A;
> +
> +for (j = 0; j < lumFilterSize; j++)
> +Y += lumSrc[j][i] * (unsigned)lumFilter[j];
> +
> +for (j = 0; j < chrFilterSize; j++) {
> +U += chrUSrc[j][i] * (unsigned)chrFilter[j];
> +V += chrVSrc[j][i] * (unsigned)chrFilter[j];
> +}
> +
> +Y >>= 14;
> +Y += 0x1;
> +U >>= 14;
> +V >>= 14;
> +
> +if (hasAlpha) {
> +A = -0x4000;
> +
> +for (j = 0; j < lumFilterSize; j++)
> +A += alpSrc[j][i] * (unsigned)lumFilter[j];
> +
> +A >>= 1;
> +A += 0x20002000;
> +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 
> 14));
> +}
> +
> +Y -= c->yuv2rgb_y_offset;
> +Y *= c->yuv2rgb_y_coeff;
> +Y += (1 << 13) - (1 << 29);
> +R = V * c->yuv2rgb_v2r_coeff;
> +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> +B =U * c->yuv2rgb_u2b_coeff;
> +
> +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
> +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
> +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
> +
> +dest32[0] = av_float2int(float_mult * (float)R);
> +dest32[1] = av_float2int(float_mult * (float)G);
> +dest32[2] = av_float2int(float_mult * (float)B);
> +if (alpha)
> +dest32[3] = a;

why is this using uint32_t with av_float2int() and not floats straight ?



> +
> +dest32 += pixelStep;
> +}
> +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
> +dest32 = (uint32_t*)dest;
> +for (i = 0; i < dstW; i++) {
> +dest32[0] = av_bswap32(dest32[0]);
> +dest32[1] = av_bswap32(dest32[1]);
> +dest32[2] = av_bswap32(dest32[2]);
> +if (alpha)
> +dest32[3] = av_bswap32(dest32[3]);
> +
> +dest32 += pixelStep;
> +}
> +}

teh code in bswapdsp seems more efficient, that should be shared and
used ideally

thx

[...]
-- 
M

[FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support

2022-11-23 Thread mindmark
From: Mark Reid 

---
 libswscale/output.c  | 92 
 libswscale/swscale_unscaled.c|  4 +-
 libswscale/tests/floatimg_cmp.c  |  4 +-
 libswscale/utils.c   | 16 +++--
 libswscale/yuv2rgb.c |  2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 tests/ref/fate/sws-floatimg-cmp  | 16 +
 20 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..1d86a244f9 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t *dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
+int hasAlpha = alpha && alpSrcx;
+int pixelStep = alpha ? 4 : 3;
+uint32_t *dest32 = (uint32_t*)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+uint32_t a = av_float2int(1.0f);
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 
14));
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += (1 << 13) - (1 << 29);
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
+G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
+B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
+
+dest32[0] = av_float2int(float_mult * (float)R);
+dest32[1] = av_float2int(float_mult * (float)G);
+dest32[2] = av_float2int(float_mult * (float)B);
+if (alpha)
+dest32[3] = a;
+
+dest32 += pixelStep;
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+dest32 = (uint32_t*)dest;
+for (i = 0; i < dstW; i++) {
+dest32[0] = av_bswap32(dest32[0]);
+dest32[1] = av_bswap32(dest32[1]);
+dest32[2] = av_bswap32(dest32[2]);
+if (alpha)
+dest32[3] = av_bswap32(dest32[3]);
+
+dest32 += pixelStep;
+}
+}
+
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 }
 break;
 
+case AV_PIX_FMT_RGBF32LE:
+case AV_PIX_FMT_RGBF32BE:
+case AV_PIX_FMT_RGBAF32LE:
+case AV_PIX_FMT_RGBAF32BE:
+*yuv2packedX = yuv2rgbaf32_full_X_c;
+break;
 case AV_PIX_FMT_RGB24: