Re: [FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support
On Sun, Dec 4, 2022 at 4:05 PM Michael Niedermayer wrote: > On Wed, Nov 23, 2022 at 11:35:40AM -0800, mindm...@gmail.com wrote: > > From: Mark Reid > > > > --- > > libswscale/output.c | 92 > > libswscale/swscale_unscaled.c| 4 +- > > libswscale/tests/floatimg_cmp.c | 4 +- > > libswscale/utils.c | 16 +++-- > > libswscale/yuv2rgb.c | 2 + > > tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + > > tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + > > tests/ref/fate/filter-pixdesc-rgbf32be | 1 + > > tests/ref/fate/filter-pixdesc-rgbf32le | 1 + > > tests/ref/fate/filter-pixfmts-copy | 4 ++ > > tests/ref/fate/filter-pixfmts-crop | 4 ++ > > tests/ref/fate/filter-pixfmts-field | 4 ++ > > tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ > > tests/ref/fate/filter-pixfmts-hflip | 4 ++ > > tests/ref/fate/filter-pixfmts-il | 4 ++ > > tests/ref/fate/filter-pixfmts-null | 4 ++ > > tests/ref/fate/filter-pixfmts-scale | 4 ++ > > tests/ref/fate/filter-pixfmts-transpose | 4 ++ > > tests/ref/fate/filter-pixfmts-vflip | 4 ++ > > tests/ref/fate/sws-floatimg-cmp | 16 + > > 20 files changed, 170 insertions(+), 8 deletions(-) > > create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be > > create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le > > create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be > > create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le > > > > diff --git a/libswscale/output.c b/libswscale/output.c > > index 5c85bff971..1d86a244f9 100644 > > --- a/libswscale/output.c > > +++ b/libswscale/output.c > > @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t > *lumFilter, > > } > > } > > > > +static void > > +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, > > +const int16_t **lumSrcx, int lumFilterSize, > > +const int16_t *chrFilter, const int16_t **chrUSrcx, > > +const int16_t **chrVSrcx, int chrFilterSize, > > +const int16_t **alpSrcx, uint8_t *dest, > > +int dstW, int y) > > +{ > > +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); > > +int i; > > +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; > > +int hasAlpha = alpha && alpSrcx; > > +int pixelStep = alpha ? 4 : 3; > > +uint32_t *dest32 = (uint32_t*)dest; > > +const int32_t **lumSrc = (const int32_t**)lumSrcx; > > +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; > > +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; > > +const int32_t **alpSrc = (const int32_t**)alpSrcx; > > +static const float float_mult = 1.0f / 65535.0f; > > +uint32_t a = av_float2int(1.0f); > > + > > +for (i = 0; i < dstW; i++) { > > +int j; > > +int Y = -0x4000; > > +int U = -(128 << 23); > > +int V = -(128 << 23); > > +int R, G, B, A; > > + > > +for (j = 0; j < lumFilterSize; j++) > > +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; > > + > > +for (j = 0; j < chrFilterSize; j++) { > > +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; > > +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; > > +} > > + > > +Y >>= 14; > > +Y += 0x1; > > +U >>= 14; > > +V >>= 14; > > + > > +if (hasAlpha) { > > +A = -0x4000; > > + > > +for (j = 0; j < lumFilterSize; j++) > > +A += alpSrc[j][i] * (unsigned)lumFilter[j]; > > + > > +A >>= 1; > > +A += 0x20002000; > > +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) > >> 14)); > > +} > > + > > +Y -= c->yuv2rgb_y_offset; > > +Y *= c->yuv2rgb_y_coeff; > > +Y += (1 << 13) - (1 << 29); > > +R = V * c->yuv2rgb_v2r_coeff; > > +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; > > +B =U * c->yuv2rgb_u2b_coeff; > > + > > +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16); > > +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16); > > +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16); > > + > > +dest32[0] = av_float2int(float_mult * (float)R); > > +dest32[1] = av_float2int(float_mult * (float)G); > > +dest32[2] = av_float2int(float_mult * (float)B); > > +if (alpha) > > +dest32[3] = a; > > why is this using uint32_t with av_float2int() and not floats straight ? > > It's this way because it is matching the planar f32 version, I will change both. > > > > + > > +dest32 += pixelStep; > > +} > > +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { > > +dest32 = (uint32_t*)dest; > > +for (i = 0; i < dstW; i++) { > > +dest32
Re: [FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support
On Wed, Nov 23, 2022 at 11:35:40AM -0800, mindm...@gmail.com wrote: > From: Mark Reid > > --- > libswscale/output.c | 92 > libswscale/swscale_unscaled.c| 4 +- > libswscale/tests/floatimg_cmp.c | 4 +- > libswscale/utils.c | 16 +++-- > libswscale/yuv2rgb.c | 2 + > tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + > tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + > tests/ref/fate/filter-pixdesc-rgbf32be | 1 + > tests/ref/fate/filter-pixdesc-rgbf32le | 1 + > tests/ref/fate/filter-pixfmts-copy | 4 ++ > tests/ref/fate/filter-pixfmts-crop | 4 ++ > tests/ref/fate/filter-pixfmts-field | 4 ++ > tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ > tests/ref/fate/filter-pixfmts-hflip | 4 ++ > tests/ref/fate/filter-pixfmts-il | 4 ++ > tests/ref/fate/filter-pixfmts-null | 4 ++ > tests/ref/fate/filter-pixfmts-scale | 4 ++ > tests/ref/fate/filter-pixfmts-transpose | 4 ++ > tests/ref/fate/filter-pixfmts-vflip | 4 ++ > tests/ref/fate/sws-floatimg-cmp | 16 + > 20 files changed, 170 insertions(+), 8 deletions(-) > create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be > create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le > create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be > create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le > > diff --git a/libswscale/output.c b/libswscale/output.c > index 5c85bff971..1d86a244f9 100644 > --- a/libswscale/output.c > +++ b/libswscale/output.c > @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t > *lumFilter, > } > } > > +static void > +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, > +const int16_t **lumSrcx, int lumFilterSize, > +const int16_t *chrFilter, const int16_t **chrUSrcx, > +const int16_t **chrVSrcx, int chrFilterSize, > +const int16_t **alpSrcx, uint8_t *dest, > +int dstW, int y) > +{ > +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); > +int i; > +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; > +int hasAlpha = alpha && alpSrcx; > +int pixelStep = alpha ? 4 : 3; > +uint32_t *dest32 = (uint32_t*)dest; > +const int32_t **lumSrc = (const int32_t**)lumSrcx; > +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; > +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; > +const int32_t **alpSrc = (const int32_t**)alpSrcx; > +static const float float_mult = 1.0f / 65535.0f; > +uint32_t a = av_float2int(1.0f); > + > +for (i = 0; i < dstW; i++) { > +int j; > +int Y = -0x4000; > +int U = -(128 << 23); > +int V = -(128 << 23); > +int R, G, B, A; > + > +for (j = 0; j < lumFilterSize; j++) > +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; > + > +for (j = 0; j < chrFilterSize; j++) { > +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; > +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; > +} > + > +Y >>= 14; > +Y += 0x1; > +U >>= 14; > +V >>= 14; > + > +if (hasAlpha) { > +A = -0x4000; > + > +for (j = 0; j < lumFilterSize; j++) > +A += alpSrc[j][i] * (unsigned)lumFilter[j]; > + > +A >>= 1; > +A += 0x20002000; > +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> > 14)); > +} > + > +Y -= c->yuv2rgb_y_offset; > +Y *= c->yuv2rgb_y_coeff; > +Y += (1 << 13) - (1 << 29); > +R = V * c->yuv2rgb_v2r_coeff; > +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; > +B =U * c->yuv2rgb_u2b_coeff; > + > +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16); > +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16); > +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16); > + > +dest32[0] = av_float2int(float_mult * (float)R); > +dest32[1] = av_float2int(float_mult * (float)G); > +dest32[2] = av_float2int(float_mult * (float)B); > +if (alpha) > +dest32[3] = a; why is this using uint32_t with av_float2int() and not floats straight ? > + > +dest32 += pixelStep; > +} > +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { > +dest32 = (uint32_t*)dest; > +for (i = 0; i < dstW; i++) { > +dest32[0] = av_bswap32(dest32[0]); > +dest32[1] = av_bswap32(dest32[1]); > +dest32[2] = av_bswap32(dest32[2]); > +if (alpha) > +dest32[3] = av_bswap32(dest32[3]); > + > +dest32 += pixelStep; > +} > +} teh code in bswapdsp seems more efficient, that should be shared and used ideally thx [...] -- M
[FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support
From: Mark Reid --- libswscale/output.c | 92 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 16 +++-- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ tests/ref/fate/sws-floatimg-cmp | 16 + 20 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le diff --git a/libswscale/output.c b/libswscale/output.c index 5c85bff971..1d86a244f9 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t *dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; +int hasAlpha = alpha && alpSrcx; +int pixelStep = alpha ? 4 : 3; +uint32_t *dest32 = (uint32_t*)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; +uint32_t a = av_float2int(1.0f); + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += (1 << 13) - (1 << 29); +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16); +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16); +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16); + +dest32[0] = av_float2int(float_mult * (float)R); +dest32[1] = av_float2int(float_mult * (float)G); +dest32[2] = av_float2int(float_mult * (float)B); +if (alpha) +dest32[3] = a; + +dest32 += pixelStep; +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +dest32 = (uint32_t*)dest; +for (i = 0; i < dstW; i++) { +dest32[0] = av_bswap32(dest32[0]); +dest32[1] = av_bswap32(dest32[1]); +dest32[2] = av_bswap32(dest32[2]); +if (alpha) +dest32[3] = av_bswap32(dest32[3]); + +dest32 += pixelStep; +} +} + +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } break; +case AV_PIX_FMT_RGBF32LE: +case AV_PIX_FMT_RGBF32BE: +case AV_PIX_FMT_RGBAF32LE: +case AV_PIX_FMT_RGBAF32BE: +*yuv2packedX = yuv2rgbaf32_full_X_c; +break; case AV_PIX_FMT_RGB24: