[FFmpeg-devel] [PATCH v3 4/4] swscale/bswapdsp: copy over bswapdsp from avcodec
From: Mark Reid There are some places in input.c that could use it too but they aren't currently being pass the SwsContext --- libswscale/Makefile | 1 + libswscale/bswapdsp.c| 59 libswscale/bswapdsp.h| 66 + libswscale/output.c | 36 +++ libswscale/riscv/Makefile| 7 +- libswscale/riscv/bswapdsp_init.c | 46 + libswscale/riscv/bswapdsp_rvb.S | 68 + libswscale/riscv/bswapdsp_rvv.S | 62 libswscale/swscale_internal.h| 3 + libswscale/swscale_unscaled.c| 26 ++--- libswscale/utils.c | 2 + libswscale/x86/Makefile | 6 +- libswscale/x86/bswapdsp.asm | 157 +++ libswscale/x86/bswapdsp_init.c | 40 14 files changed, 537 insertions(+), 42 deletions(-) create mode 100644 libswscale/bswapdsp.c create mode 100644 libswscale/bswapdsp.h create mode 100644 libswscale/riscv/bswapdsp_init.c create mode 100644 libswscale/riscv/bswapdsp_rvb.S create mode 100644 libswscale/riscv/bswapdsp_rvv.S create mode 100644 libswscale/x86/bswapdsp.asm create mode 100644 libswscale/x86/bswapdsp_init.c diff --git a/libswscale/Makefile b/libswscale/Makefile index 757997b401..4a916739c3 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -6,6 +6,7 @@ HEADERS = swscale.h \ version_major.h \ OBJS = alphablend.o \ + bswapdsp.o \ hscale.o \ hscale_fast_bilinear.o \ gamma.o \ diff --git a/libswscale/bswapdsp.c b/libswscale/bswapdsp.c new file mode 100644 index 00..a164d89a76 --- /dev/null +++ b/libswscale/bswapdsp.c @@ -0,0 +1,59 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/bswap.h" +#include "bswapdsp.h" + +static void bswap32_buf(uint32_t *dst, const uint32_t *src, int len) +{ +int i; + +for (i = 0; i + 8 <= len; i += 8) { +dst[i + 0] = av_bswap32(src[i + 0]); +dst[i + 1] = av_bswap32(src[i + 1]); +dst[i + 2] = av_bswap32(src[i + 2]); +dst[i + 3] = av_bswap32(src[i + 3]); +dst[i + 4] = av_bswap32(src[i + 4]); +dst[i + 5] = av_bswap32(src[i + 5]); +dst[i + 6] = av_bswap32(src[i + 6]); +dst[i + 7] = av_bswap32(src[i + 7]); +} +for (; i < len; i++) +dst[i + 0] = av_bswap32(src[i + 0]); +} + +static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len) +{ +while (len--) +*dst++ = av_bswap16(*src++); +} + +av_cold void ff_sws_bswapdsp_init(BswapDSPContext *c) +{ +c->bswap32_buf = bswap32_buf; +c->bswap16_buf = bswap16_buf; + +#if ARCH_RISCV +ff_sws_bswapdsp_init_riscv(c); +#elif ARCH_X86 +ff_sws_bswapdsp_init_x86(c); +#endif +} diff --git a/libswscale/bswapdsp.h b/libswscale/bswapdsp.h new file mode 100644 index 00..f2e12d1b8f --- /dev/null +++ b/libswscale/bswapdsp.h @@ -0,0 +1,66 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWSCALE_BSWAPDSP_H +#define SWSCALE_BSWAPDSP_H + +#include + +/** + * @file + * Optimized buffer byte swapping routines. + */ + +typedef struct BswapDSPContext { +/** + * Byte swap 32 bit elements in
[FFmpeg-devel] [PATCH v3 3/4] avcodec/bswapdsp: add documentation
From: Mark Reid --- libavcodec/bswapdsp.h | 33 + 1 file changed, 33 insertions(+) diff --git a/libavcodec/bswapdsp.h b/libavcodec/bswapdsp.h index 10519cfd2e..ecc37d2c56 100644 --- a/libavcodec/bswapdsp.h +++ b/libavcodec/bswapdsp.h @@ -21,11 +21,44 @@ #include +/** + * @file + * Optimized buffer byte swapping routines. + */ + typedef struct BswapDSPContext { +/** + * Byte swap 32 bit elements in a buffer. + + * @param dst Destination buffer. + * @param src Source buffer, may be the same as dst. + * @param len The number of elements in the buffer. + * + */ + /** @{ */ void (*bswap32_buf)(uint32_t *dst, const uint32_t *src, int len); +/** @} */ + +/** + * Byte swap 16 bit elements in a buffer. + * + * @param dst Destination buffer. + * @param src Source buffer, may be the same as dst. + * @param len The number of elements in the buffer. + * + */ +/** @{ */ void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); +/** @} */ } BswapDSPContext; + +/** + * Initialize BswapDSPContext function pointers. + * + * @param c pointer to BswapDSPContext + * + */ void ff_bswapdsp_init(BswapDSPContext *c); void ff_bswapdsp_init_riscv(BswapDSPContext *c); void ff_bswapdsp_init_x86(BswapDSPContext *c); -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3 2/4] avcodec/bswapdsp: rename bswap_buf to bswap_buf32
From: Mark Reid --- libavcodec/4xm.c | 8 libavcodec/alsdec.c | 6 +++--- libavcodec/apedec.c | 4 ++-- libavcodec/asvdec.c | 4 ++-- libavcodec/asvenc.c | 4 ++-- libavcodec/bswapdsp.c| 8 libavcodec/bswapdsp.h| 2 +- libavcodec/eatqi.c | 4 ++-- libavcodec/fraps.c | 4 ++-- libavcodec/huffyuvdec.c | 8 libavcodec/huffyuvenc.c | 2 +- libavcodec/imm4.c| 6 +++--- libavcodec/mimic.c | 6 +++--- libavcodec/motionpixels.c| 4 ++-- libavcodec/mpc7.c| 4 ++-- libavcodec/rawdec.c | 2 +- libavcodec/riscv/bswapdsp_init.c | 4 ++-- libavcodec/truemotion2.c | 4 ++-- libavcodec/truespeech.c | 2 +- libavcodec/utvideodec.c | 12 ++-- libavcodec/utvideoenc.c | 6 +++--- libavcodec/x86/bswapdsp.asm | 2 +- libavcodec/x86/bswapdsp_init.c | 6 +++--- libavcodec/ylc.c | 12 ++-- tests/checkasm/bswapdsp.c| 2 +- 25 files changed, 63 insertions(+), 63 deletions(-) diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c index 5636fdef2d..a26b2f9004 100644 --- a/libavcodec/4xm.c +++ b/libavcodec/4xm.c @@ -469,8 +469,8 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length) bitstream_size); if (!f->bitstream_buffer) return AVERROR(ENOMEM); -f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra), - bitstream_size / 4); +f->bbdsp.bswap32_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra), + bitstream_size / 4); init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size); wordstream_offset = extra + bitstream_size; @@ -813,8 +813,8 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length) prestream_size); if (!f->bitstream_buffer) return AVERROR(ENOMEM); -f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream, - prestream_size / 4); +f->bbdsp.bswap32_buf(f->bitstream_buffer, (const uint32_t *) prestream, + prestream_size / 4); init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size); f->last_dc = 0 * 128 * 8 * 8; diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c index 4605b2248f..6f401cf890 100644 --- a/libavcodec/alsdec.c +++ b/libavcodec/alsdec.c @@ -1899,9 +1899,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame, sample++) *dest++ = av_bswap16(src[sample]); } else { -ctx->bdsp.bswap_buf((uint32_t *) ctx->crc_buffer, -(uint32_t *) frame->data[0], -ctx->cur_frame_length * channels); +ctx->bdsp.bswap32_buf((uint32_t *) ctx->crc_buffer, + (uint32_t *) frame->data[0], + ctx->cur_frame_length * channels); } crc_source = ctx->crc_buffer; } else { diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index c08d13d6c2..b7856c82e5 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -1499,8 +1499,8 @@ static int ape_decode_frame(AVCodecContext *avctx, AVFrame *frame, av_fast_padded_malloc(&s->data, &s->data_size, buf_size); if (!s->data) return AVERROR(ENOMEM); -s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf, - buf_size >> 2); +s->bdsp.bswap32_buf((uint32_t *) s->data, (const uint32_t *) buf, +buf_size >> 2); memset(s->data + (buf_size & ~3), 0, buf_size & 3); s->ptr = s->data; s->data_end = s->data + buf_size; diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c index 699aab9f8f..25dab7473f 100644 --- a/libavcodec/asvdec.c +++ b/libavcodec/asvdec.c @@ -253,8 +253,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p, if (!a->bitstream_buffer) return AVERROR(ENOMEM); -c->bbdsp.bswap_buf((uint32_t *) a->bitstream_buffer, - (const uint32_t *) buf, buf_size / 4); +c->bbdsp.bswap32_buf((uint32_t *) a->bitstream_buffer, + (const uint32_t *) buf, buf_size / 4); ret = init_get_bits8(&a->gb, a->bitstream_buffer, buf_size); } else { ret = init_get_bits8_le(&a->gb, buf, buf_size); diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index 9da7cbb986..2bf67d686c 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -309,8 +309,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
[FFmpeg-devel] [PATCH v3 1/4] avcodec/bswapdsp: remove unused cextern
From: Mark Reid --- libavcodec/x86/bswapdsp.asm | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm index 31c6c48a21..2aa235e13c 100644 --- a/libavcodec/x86/bswapdsp.asm +++ b/libavcodec/x86/bswapdsp.asm @@ -26,8 +26,6 @@ SECTION_RODATA pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 -cextern pb_80 - SECTION .text ; %1 = aligned/unaligned -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 1/2] avutil: move bswapdsp from avcodec to avutil
From: Mark Reid Also renamed bswap_buf to bswap32_buf --- configure | 56 --- libavcodec/4xm.c | 14 ++--- libavcodec/Makefile | 1 - libavcodec/ac3dec.c | 4 +- libavcodec/ac3dec.h | 4 +- libavcodec/alsdec.c | 12 ++-- libavcodec/apedec.c | 10 ++-- libavcodec/asv.c | 4 +- libavcodec/asv.h | 4 +- libavcodec/asvdec.c | 4 +- libavcodec/asvenc.c | 4 +- libavcodec/cllc.c | 6 +- libavcodec/eamad.c| 6 +- libavcodec/eatqi.c| 10 ++-- libavcodec/exr.c | 6 +- libavcodec/flacenc.c | 6 +- libavcodec/fraps.c| 10 ++-- libavcodec/hevcdec.c | 4 +- libavcodec/hevcdec.h | 4 +- libavcodec/huffyuvdec.c | 14 ++--- libavcodec/huffyuvenc.c | 8 +-- libavcodec/imc.c | 6 +- libavcodec/imm4.c | 12 ++-- libavcodec/mdec.c | 6 +- libavcodec/mimic.c| 12 ++-- libavcodec/mobiclip.c | 6 +- libavcodec/motionpixels.c | 10 ++-- libavcodec/mpc.h | 4 +- libavcodec/mpc7.c | 8 +-- libavcodec/rawdec.c | 8 +-- libavcodec/riscv/Makefile | 3 - libavcodec/shorten.c | 7 ++- libavcodec/truemotion2.c | 11 ++-- libavcodec/truespeech.c | 8 +-- libavcodec/utvideo.h | 4 +- libavcodec/utvideodec.c | 16 +++--- libavcodec/utvideoenc.c | 10 ++-- libavcodec/x86/Makefile | 2 - libavcodec/ylc.c | 18 +++--- libavutil/Makefile| 2 + {libavcodec => libavutil}/bswapdsp.c | 17 +++--- {libavcodec => libavutil}/bswapdsp.h | 16 +++--- libavutil/riscv/Makefile | 7 ++- .../riscv/bswapdsp_init.c | 9 +-- .../riscv/bswapdsp_rvb.S | 0 .../riscv/bswapdsp_rvv.S | 0 libavutil/version.h | 2 +- libavutil/x86/Makefile| 8 ++- {libavcodec => libavutil}/x86/bswapdsp.asm| 4 +- {libavcodec => libavutil}/x86/bswapdsp_init.c | 17 +++--- tests/checkasm/Makefile | 2 +- tests/checkasm/bswapdsp.c | 8 +-- tests/checkasm/checkasm.c | 4 +- 53 files changed, 216 insertions(+), 222 deletions(-) rename {libavcodec => libavutil}/bswapdsp.c (80%) rename {libavcodec => libavutil}/bswapdsp.h (71%) rename {libavcodec => libavutil}/riscv/bswapdsp_init.c (85%) rename {libavcodec => libavutil}/riscv/bswapdsp_rvb.S (100%) rename {libavcodec => libavutil}/riscv/bswapdsp_rvv.S (100%) rename {libavcodec => libavutil}/x86/bswapdsp.asm (97%) rename {libavcodec => libavutil}/x86/bswapdsp_init.c (80%) diff --git a/configure b/configure index f4eedfc207..03b502ea37 100755 --- a/configure +++ b/configure @@ -2421,7 +2421,6 @@ CONFIG_EXTRA=" audio_frame_queue audiodsp blockdsp -bswapdsp cabac cbs cbs_av1 @@ -2749,8 +2748,8 @@ aac_decoder_select="adts_header mpeg4audio sinewin" aac_fixed_decoder_select="adts_header mpeg4audio" aac_encoder_select="audio_frame_queue iirfilter lpc sinewin" aac_latm_decoder_select="aac_decoder aac_latm_parser" -ac3_decoder_select="ac3_parser ac3dsp bswapdsp fmtconvert" -ac3_fixed_decoder_select="ac3_parser ac3dsp bswapdsp" +ac3_decoder_select="ac3_parser ac3dsp fmtconvert" +ac3_fixed_decoder_select="ac3_parser ac3dsp" ac3_encoder_select="ac3dsp audiodsp me_cmp" ac3_fixed_encoder_select="ac3dsp audiodsp me_cmp" acelp_kelvin_decoder_select="audiodsp" @@ -2759,20 +2758,20 @@ adpcm_g722_encoder_select="g722dsp" agm_decoder_select="idctdsp" aic_decoder_select="golomb idctdsp" alac_encoder_select="lpc" -als_decoder_select="bswapdsp mpeg4audio" +als_decoder_select="mpeg4audio" amrnb_decoder_select="lsp" amrwb_decoder_select="lsp" amv_decoder_select="sp5x_decoder exif" amv_encoder_select="jpegtables mpegvideoenc" -ape_decoder_select="bswapdsp llauddsp" +ape_decoder_select="llauddsp" apng_decoder_select="inflate_wrapper" apng_encoder_select="deflate_wrapper llvidencdsp" aptx_encoder_select="audio_frame_queue" aptx_hd_encoder_select="audio_frame_queue" -asv1_decoder_select="blockdsp bswapdsp idctdsp" -asv1_enc
[FFmpeg-devel] [PATCH v2 2/2] libswscale: add AVBSwapDSPContext and use
From: Mark Reid There are some places in input.c that could use it too but they aren't currently being pass the SwsContext --- libswscale/output.c | 36 +++ libswscale/swscale_internal.h | 3 +++ libswscale/swscale_unscaled.c | 26 + libswscale/utils.c| 2 ++ 4 files changed, 29 insertions(+), 38 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 5c85bff971..cd44081e3d 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2313,13 +2313,11 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, } } if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { -for (i = 0; i < dstW; i++) { -dest16[0][i] = av_bswap16(dest16[0][i]); -dest16[1][i] = av_bswap16(dest16[1][i]); -dest16[2][i] = av_bswap16(dest16[2][i]); -if (hasAlpha) -dest16[3][i] = av_bswap16(dest16[3][i]); -} +c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW); +c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW); +c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW); +if (hasAlpha) +c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW); } } @@ -2385,13 +2383,11 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, dest16[3][i] = av_clip_uintp2(A, 30) >> 14; } if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { -for (i = 0; i < dstW; i++) { -dest16[0][i] = av_bswap16(dest16[0][i]); -dest16[1][i] = av_bswap16(dest16[1][i]); -dest16[2][i] = av_bswap16(dest16[2][i]); -if (hasAlpha) -dest16[3][i] = av_bswap16(dest16[3][i]); -} +c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW); +c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW); +c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW); +if (hasAlpha) +c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW); } } @@ -2461,13 +2457,11 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); } if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { -for (i = 0; i < dstW; i++) { -dest32[0][i] = av_bswap32(dest32[0][i]); -dest32[1][i] = av_bswap32(dest32[1][i]); -dest32[2][i] = av_bswap32(dest32[2][i]); -if (hasAlpha) -dest32[3][i] = av_bswap32(dest32[3][i]); -} +c->bsdsp.bswap32_buf(dest32[0], dest32[0], dstW); +c->bsdsp.bswap32_buf(dest32[1], dest32[1], dstW); +c->bsdsp.bswap32_buf(dest32[2], dest32[2], dstW); +if (hasAlpha) +c->bsdsp.bswap32_buf(dest32[3], dest32[3], dstW); } } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index abeebbb002..400f0bc8ed 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -26,6 +26,7 @@ #include "config.h" #include "libavutil/avassert.h" +#include "libavutil/bswapdsp.h" #include "libavutil/common.h" #include "libavutil/frame.h" #include "libavutil/intreadwrite.h" @@ -682,6 +683,8 @@ typedef struct SwsContext { atomic_int data_unaligned_warned; Half2FloatTables *h2f_tables; + +AVBSwapDSPContext bsdsp; } SwsContext; //FIXME check init (where 0) diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 9af2e7ecc3..0010ab24d1 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -468,7 +468,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { -int i, j, p; +int i, p; for (p = 0; p < 4; p++) { int srcstr = srcStride[p] / 2; @@ -480,9 +480,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[], continue; dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr; for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) { -for (j = 0; j < min_stride; j++) { -dstPtr[j] = av_bswap16(srcPtr[j]); -} +c->bsdsp.bswap16_buf(dstPtr, srcPtr, min_stride); srcPtr += srcstr; dstPtr += dststr; } @@ -495,7 +493,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { -int i, j, p; +int i, p; for (p = 0; p < 4; p++) { int srcstr = srcStride[p] / 4; @@ -507,9 +505,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[], continue; dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr; for (i = 0; i < (srcSliceH >> c
[FFmpeg-devel] [PATCH 1/2] avutil: move bswapdsp from avcodec to avutil
From: Mark Reid Also renamed bswap_buf to bswap32_buf --- configure | 56 --- libavcodec/4xm.c | 14 ++--- libavcodec/Makefile | 1 - libavcodec/ac3dec.c | 4 +- libavcodec/ac3dec.h | 4 +- libavcodec/alsdec.c | 12 ++-- libavcodec/apedec.c | 10 ++-- libavcodec/asv.c | 4 +- libavcodec/asv.h | 4 +- libavcodec/asvdec.c | 4 +- libavcodec/asvenc.c | 4 +- libavcodec/cllc.c | 6 +- libavcodec/eamad.c| 6 +- libavcodec/eatqi.c| 10 ++-- libavcodec/exr.c | 6 +- libavcodec/flacenc.c | 6 +- libavcodec/fraps.c| 10 ++-- libavcodec/hevcdec.c | 4 +- libavcodec/hevcdec.h | 4 +- libavcodec/huffyuvdec.c | 14 ++--- libavcodec/huffyuvenc.c | 8 +-- libavcodec/imc.c | 6 +- libavcodec/imm4.c | 12 ++-- libavcodec/mdec.c | 6 +- libavcodec/mimic.c| 12 ++-- libavcodec/mobiclip.c | 6 +- libavcodec/motionpixels.c | 10 ++-- libavcodec/mpc.h | 4 +- libavcodec/mpc7.c | 8 +-- libavcodec/rawdec.c | 8 +-- libavcodec/riscv/Makefile | 3 - libavcodec/shorten.c | 7 ++- libavcodec/truemotion2.c | 11 ++-- libavcodec/truespeech.c | 8 +-- libavcodec/utvideo.h | 4 +- libavcodec/utvideodec.c | 16 +++--- libavcodec/utvideoenc.c | 10 ++-- libavcodec/x86/Makefile | 2 - libavcodec/ylc.c | 18 +++--- libavutil/Makefile| 2 + {libavcodec => libavutil}/bswapdsp.c | 17 +++--- {libavcodec => libavutil}/bswapdsp.h | 16 +++--- libavutil/riscv/Makefile | 7 ++- .../riscv/bswapdsp_init.c | 9 +-- .../riscv/bswapdsp_rvb.S | 0 .../riscv/bswapdsp_rvv.S | 0 libavutil/version.h | 2 +- libavutil/x86/Makefile| 8 ++- {libavcodec => libavutil}/x86/bswapdsp.asm| 2 +- {libavcodec => libavutil}/x86/bswapdsp_init.c | 17 +++--- tests/checkasm/Makefile | 2 +- tests/checkasm/bswapdsp.c | 8 +-- tests/checkasm/checkasm.c | 4 +- 53 files changed, 216 insertions(+), 220 deletions(-) rename {libavcodec => libavutil}/bswapdsp.c (80%) rename {libavcodec => libavutil}/bswapdsp.h (71%) rename {libavcodec => libavutil}/riscv/bswapdsp_init.c (85%) rename {libavcodec => libavutil}/riscv/bswapdsp_rvb.S (100%) rename {libavcodec => libavutil}/riscv/bswapdsp_rvv.S (100%) rename {libavcodec => libavutil}/x86/bswapdsp.asm (98%) rename {libavcodec => libavutil}/x86/bswapdsp_init.c (80%) diff --git a/configure b/configure index f4eedfc207..03b502ea37 100755 --- a/configure +++ b/configure @@ -2421,7 +2421,6 @@ CONFIG_EXTRA=" audio_frame_queue audiodsp blockdsp -bswapdsp cabac cbs cbs_av1 @@ -2749,8 +2748,8 @@ aac_decoder_select="adts_header mpeg4audio sinewin" aac_fixed_decoder_select="adts_header mpeg4audio" aac_encoder_select="audio_frame_queue iirfilter lpc sinewin" aac_latm_decoder_select="aac_decoder aac_latm_parser" -ac3_decoder_select="ac3_parser ac3dsp bswapdsp fmtconvert" -ac3_fixed_decoder_select="ac3_parser ac3dsp bswapdsp" +ac3_decoder_select="ac3_parser ac3dsp fmtconvert" +ac3_fixed_decoder_select="ac3_parser ac3dsp" ac3_encoder_select="ac3dsp audiodsp me_cmp" ac3_fixed_encoder_select="ac3dsp audiodsp me_cmp" acelp_kelvin_decoder_select="audiodsp" @@ -2759,20 +2758,20 @@ adpcm_g722_encoder_select="g722dsp" agm_decoder_select="idctdsp" aic_decoder_select="golomb idctdsp" alac_encoder_select="lpc" -als_decoder_select="bswapdsp mpeg4audio" +als_decoder_select="mpeg4audio" amrnb_decoder_select="lsp" amrwb_decoder_select="lsp" amv_decoder_select="sp5x_decoder exif" amv_encoder_select="jpegtables mpegvideoenc" -ape_decoder_select="bswapdsp llauddsp" +ape_decoder_select="llauddsp" apng_decoder_select="inflate_wrapper" apng_encoder_select="deflate_wrapper llvidencdsp" aptx_encoder_select="audio_frame_queue" aptx_hd_encoder_select="audio_frame_queue" -asv1_decoder_select="blockdsp bswapdsp idctdsp" -asv1_enc
[FFmpeg-devel] [PATCH 2/2] libswscale: add AVBSwapDSPContext and use
From: Mark Reid There are some places in input.c that could use it too but they aren't currently being pass the SwsContext --- libswscale/output.c | 36 +++ libswscale/swscale_internal.h | 3 +++ libswscale/swscale_unscaled.c | 26 + libswscale/utils.c| 2 ++ 4 files changed, 29 insertions(+), 38 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 5c85bff971..cd44081e3d 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2313,13 +2313,11 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, } } if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { -for (i = 0; i < dstW; i++) { -dest16[0][i] = av_bswap16(dest16[0][i]); -dest16[1][i] = av_bswap16(dest16[1][i]); -dest16[2][i] = av_bswap16(dest16[2][i]); -if (hasAlpha) -dest16[3][i] = av_bswap16(dest16[3][i]); -} +c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW); +c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW); +c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW); +if (hasAlpha) +c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW); } } @@ -2385,13 +2383,11 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, dest16[3][i] = av_clip_uintp2(A, 30) >> 14; } if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { -for (i = 0; i < dstW; i++) { -dest16[0][i] = av_bswap16(dest16[0][i]); -dest16[1][i] = av_bswap16(dest16[1][i]); -dest16[2][i] = av_bswap16(dest16[2][i]); -if (hasAlpha) -dest16[3][i] = av_bswap16(dest16[3][i]); -} +c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW); +c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW); +c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW); +if (hasAlpha) +c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW); } } @@ -2461,13 +2457,11 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); } if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { -for (i = 0; i < dstW; i++) { -dest32[0][i] = av_bswap32(dest32[0][i]); -dest32[1][i] = av_bswap32(dest32[1][i]); -dest32[2][i] = av_bswap32(dest32[2][i]); -if (hasAlpha) -dest32[3][i] = av_bswap32(dest32[3][i]); -} +c->bsdsp.bswap32_buf(dest32[0], dest32[0], dstW); +c->bsdsp.bswap32_buf(dest32[1], dest32[1], dstW); +c->bsdsp.bswap32_buf(dest32[2], dest32[2], dstW); +if (hasAlpha) +c->bsdsp.bswap32_buf(dest32[3], dest32[3], dstW); } } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index abeebbb002..400f0bc8ed 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -26,6 +26,7 @@ #include "config.h" #include "libavutil/avassert.h" +#include "libavutil/bswapdsp.h" #include "libavutil/common.h" #include "libavutil/frame.h" #include "libavutil/intreadwrite.h" @@ -682,6 +683,8 @@ typedef struct SwsContext { atomic_int data_unaligned_warned; Half2FloatTables *h2f_tables; + +AVBSwapDSPContext bsdsp; } SwsContext; //FIXME check init (where 0) diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 9af2e7ecc3..0010ab24d1 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -468,7 +468,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { -int i, j, p; +int i, p; for (p = 0; p < 4; p++) { int srcstr = srcStride[p] / 2; @@ -480,9 +480,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[], continue; dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr; for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) { -for (j = 0; j < min_stride; j++) { -dstPtr[j] = av_bswap16(srcPtr[j]); -} +c->bsdsp.bswap16_buf(dstPtr, srcPtr, min_stride); srcPtr += srcstr; dstPtr += dststr; } @@ -495,7 +493,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { -int i, j, p; +int i, p; for (p = 0; p < 4; p++) { int srcstr = srcStride[p] / 4; @@ -507,9 +505,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[], continue; dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr; for (i = 0; i < (srcSliceH >> c
[FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support
From: Mark Reid --- libswscale/output.c | 92 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 16 +++-- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ tests/ref/fate/sws-floatimg-cmp | 16 + 20 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le diff --git a/libswscale/output.c b/libswscale/output.c index 5c85bff971..1d86a244f9 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t *dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; +int hasAlpha = alpha && alpSrcx; +int pixelStep = alpha ? 4 : 3; +uint32_t *dest32 = (uint32_t*)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; +uint32_t a = av_float2int(1.0f); + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += (1 << 13) - (1 << 29); +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16); +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16); +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16); + +dest32[0] = av_float2int(float_mult * (float)R); +dest32[1] = av_float2int(float_mult * (float)G); +dest32[2] = av_float2int(float_mult * (float)B); +if (alpha) +dest32[3] = a; + +dest32 += pixelStep; +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +dest32 = (uint32_t*)dest; +for (i = 0; i < dstW; i++) { +dest32[0] = av_bswap32(dest32[0]); +dest32[1] = av_bswap32(dest32[1]); +dest32[2] = av_bswap32(dest32[2]); +if (alpha) +dest32[3] = av_bswap32(dest32[3]); + +dest32 += pixelStep; +} +} + +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } break; +case AV_PIX_FMT_RGBF32LE: +case AV_PIX_FMT_RGBF32BE: +case AV_PIX_FMT_RGBAF32LE: +case AV_PIX_FMT_RGBAF32BE: +*yuv2packedX = yuv2rgbaf32_full_X_c; +break; case AV_PIX_FMT_RGB24:
[FFmpeg-devel] [PATCH v5 3/4] avfilter/vf_transpose: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_transpose.c | 44 ++ 1 file changed, 44 insertions(+) diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c index 469e66729f..1023d6fe82 100644 --- a/libavfilter/vf_transpose.c +++ b/libavfilter/vf_transpose.c @@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t src_linesize, transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8); } +static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize, +int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 12) { +for (x = 0; x < w; x++) { +*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + x*src_linesize)); +*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + x*src_linesize)); +*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + + +static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 16) { +for (x = 0; x < w; x++) { +*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + x*src_linesize)); +*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + static int config_props_output(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; @@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink) v->transpose_8x8 = transpose_8x8_48_c; break; case 8: v->transpose_block = transpose_block_64_c; v->transpose_8x8 = transpose_8x8_64_c; break; +case 12: v->transpose_block = transpose_block_96_c; + v->transpose_8x8 = transpose_8x8_96_c; break; +case 16: v->transpose_block = transpose_block_128_c; + v->transpose_8x8 = transpose_8x8_128_c; break; } } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v5 2/4] avfilter/vf_hflip: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_hflip_init.h | 25 + 1 file changed, 25 insertions(+) diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h index d0319f463d..31173f73fc 100644 --- a/libavfilter/vf_hflip_init.h +++ b/libavfilter/vf_hflip_init.h @@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t *ddst, int w) dst[j] = src[-j]; } +static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint32_t *in = (const uint32_t *)ssrc; +uint32_t *out = (uint32_t *)ddst; + +for (int j = 0; j < w; j++, out += 3, in -= 3) { +out[0] = in[0]; +out[1] = in[1]; +out[2] = in[2]; +} +} + +static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint64_t *in = (const uint64_t *)ssrc; +uint64_t *out = (uint64_t *)ddst; + +for (int j = 0; j < w; j++, out += 2, in -= 2) { +out[0] = in[0]; +out[1] = in[1]; +} +} + static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) { for (int i = 0; i < nb_planes; i++) { @@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) case 4: s->flip_line[i] = hflip_dword_c; break; case 6: s->flip_line[i] = hflip_b48_c; break; case 8: s->flip_line[i] = hflip_qword_c; break; +case 12: s->flip_line[i] = hflip_b96_c; break; +case 16: s->flip_line[i] = hflip_b128_c; break; default: return AVERROR_BUG; } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v5 1/4] swscale/input: add rgbaf32 input support
From: Mark Reid The input functions are currently matching the planar f32 functions. They can be factorized removing multiple lrintf/av_clipf calls, this will be addressed in a future patch. --- libswscale/input.c | 122 + libswscale/utils.c | 6 +++ 2 files changed, 128 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index d5676062a2..9c9eb31cde 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1284,6 +1284,98 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons rgbaf16_funcs_endian(le, 0) rgbaf16_funcs_endian(be, 1) +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): av_int2float(AV_RL32(&src))) + +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, +const float *src, int width, +int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +/*TODO: refactor these f32 conversions to only have one lrintf and av_clipf call*/ +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float *src, int is_be, + int width, int32_t *rgb2yuv, int comp) +{ +int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +int i; +/*TODO: refactor these f32 conversions to only have one lrintf and av_clipf call*/ +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float *src, int is_be, + int width, void *opq) +{ +int i; +for (i=0; isrcFormat; @@ -1663,6 +1755,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_c; +break; } } @@ -1973,6 +2077,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->lumToYV12 = rgbaf16leToY_c; break; +case AV_PIX_FMT_RGBF32BE: +c->lumToYV12 = rgbf32beToY_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->lumToYV12 = rgbaf32beToY_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->lumToYV12 = rgbf32leToY_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->lumToYV12 = rgbaf32leToY_c; +break; } if (c->needAlpha) { if (is16BPS(srcFormat) || isNBPS(srcFormat)) { @@ -1998,6 +2114,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->alpToYV12 = rgbaf16leToA_c; break; +case AV_PIX_FMT_RGBAF32BE: +c->alpToYV12 = rgbaf32beToA_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->alpToYV12 = rgbaf32leToA_c; +break; case AV_PIX_FMT_YA8: c->alpToYV12 = uyvyToY_c; break; diff --git a/libswscale/utils.c b/libswscale/utils.c index 85640a143f..2c520f68d1 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -266,6 +266,10 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_VUYX]= { 1, 1 }, [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, +[AV_PIX_FMT_RGBF32BE]= { 1, 0 }, +[AV_PIX_FMT_RGBF32LE]= { 1, 0 }, +[AV_PIX_FMT_RGBAF32BE] = { 1, 0 }, +[AV_PIX_FMT_RGBAF32LE] = { 1, 0 }, [AV_PIX_FMT_XV30LE] = { 1, 1 }, [AV_PIX_FMT_XV36LE] = { 1, 1 }, }; @@ -1572,6 +1576,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilte
[FFmpeg-devel] [PATCH v5 0/4] swscale: rgbaf32 input/output support
From: Mark Reid This patch series adds swscale input/output support for the packed rgb float formats. A few of the filters also needed support the larger 96/128 bit packed pixel sizes. I also plan to eventually add lossless unscaled conversions between the planer and packed formats. changes since v4 * added comment about refactoring input functions changes since v3 * removed half uv path implementation changes since v2 * add bias to rgbaf32 output to improve non overflowing range changes since v1 * output correct alpha if src doesn't have alpha Mark Reid (4): swscale/input: add rgbaf32 input support avfilter/vf_hflip: add support for packed rgb float formats avfilter/vf_transpose: add support for packed rgb float formats swscale/output: add rgbaf32 output support libavfilter/vf_hflip_init.h | 25 + libavfilter/vf_transpose.c | 44 libswscale/input.c | 122 +++ libswscale/output.c | 92 + libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 14 ++- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 + tests/ref/fate/filter-pixfmts-crop | 4 + tests/ref/fate/filter-pixfmts-field | 4 + tests/ref/fate/filter-pixfmts-fieldorder | 4 + tests/ref/fate/filter-pixfmts-hflip | 4 + tests/ref/fate/filter-pixfmts-il | 4 + tests/ref/fate/filter-pixfmts-null | 4 + tests/ref/fate/filter-pixfmts-scale | 4 + tests/ref/fate/filter-pixfmts-transpose | 4 + tests/ref/fate/filter-pixfmts-vflip | 4 + tests/ref/fate/sws-floatimg-cmp | 16 +++ 23 files changed, 363 insertions(+), 4 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v4 4/4] swscale/output: add rgbaf32 output support
From: Mark Reid --- libswscale/output.c | 92 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 16 +++-- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ tests/ref/fate/sws-floatimg-cmp | 16 + 20 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le diff --git a/libswscale/output.c b/libswscale/output.c index 5c85bff971..1d86a244f9 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t *dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; +int hasAlpha = alpha && alpSrcx; +int pixelStep = alpha ? 4 : 3; +uint32_t *dest32 = (uint32_t*)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; +uint32_t a = av_float2int(1.0f); + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += (1 << 13) - (1 << 29); +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16); +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16); +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16); + +dest32[0] = av_float2int(float_mult * (float)R); +dest32[1] = av_float2int(float_mult * (float)G); +dest32[2] = av_float2int(float_mult * (float)B); +if (alpha) +dest32[3] = a; + +dest32 += pixelStep; +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +dest32 = (uint32_t*)dest; +for (i = 0; i < dstW; i++) { +dest32[0] = av_bswap32(dest32[0]); +dest32[1] = av_bswap32(dest32[1]); +dest32[2] = av_bswap32(dest32[2]); +if (alpha) +dest32[3] = av_bswap32(dest32[3]); + +dest32 += pixelStep; +} +} + +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } break; +case AV_PIX_FMT_RGBF32LE: +case AV_PIX_FMT_RGBF32BE: +case AV_PIX_FMT_RGBAF32LE: +case AV_PIX_FMT_RGBAF32BE: +*yuv2packedX = yuv2rgbaf32_full_X_c; +break; case AV_PIX_FMT_RGB24:
[FFmpeg-devel] [PATCH v4 3/4] avfilter/vf_transpose: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_transpose.c | 44 ++ 1 file changed, 44 insertions(+) diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c index 469e66729f..1023d6fe82 100644 --- a/libavfilter/vf_transpose.c +++ b/libavfilter/vf_transpose.c @@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t src_linesize, transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8); } +static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize, +int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 12) { +for (x = 0; x < w; x++) { +*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + x*src_linesize)); +*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + x*src_linesize)); +*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + + +static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 16) { +for (x = 0; x < w; x++) { +*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + x*src_linesize)); +*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + static int config_props_output(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; @@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink) v->transpose_8x8 = transpose_8x8_48_c; break; case 8: v->transpose_block = transpose_block_64_c; v->transpose_8x8 = transpose_8x8_64_c; break; +case 12: v->transpose_block = transpose_block_96_c; + v->transpose_8x8 = transpose_8x8_96_c; break; +case 16: v->transpose_block = transpose_block_128_c; + v->transpose_8x8 = transpose_8x8_128_c; break; } } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v4 2/4] avfilter/vf_hflip: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_hflip_init.h | 25 + 1 file changed, 25 insertions(+) diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h index d0319f463d..31173f73fc 100644 --- a/libavfilter/vf_hflip_init.h +++ b/libavfilter/vf_hflip_init.h @@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t *ddst, int w) dst[j] = src[-j]; } +static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint32_t *in = (const uint32_t *)ssrc; +uint32_t *out = (uint32_t *)ddst; + +for (int j = 0; j < w; j++, out += 3, in -= 3) { +out[0] = in[0]; +out[1] = in[1]; +out[2] = in[2]; +} +} + +static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint64_t *in = (const uint64_t *)ssrc; +uint64_t *out = (uint64_t *)ddst; + +for (int j = 0; j < w; j++, out += 2, in -= 2) { +out[0] = in[0]; +out[1] = in[1]; +} +} + static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) { for (int i = 0; i < nb_planes; i++) { @@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) case 4: s->flip_line[i] = hflip_dword_c; break; case 6: s->flip_line[i] = hflip_b48_c; break; case 8: s->flip_line[i] = hflip_qword_c; break; +case 12: s->flip_line[i] = hflip_b96_c; break; +case 16: s->flip_line[i] = hflip_b128_c; break; default: return AVERROR_BUG; } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v4 1/4] swscale/input: add rgbaf32 input support
From: Mark Reid --- libswscale/input.c | 120 + libswscale/utils.c | 6 +++ 2 files changed, 126 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index d5676062a2..a305be5ac2 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1284,6 +1284,96 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons rgbaf16_funcs_endian(le, 0) rgbaf16_funcs_endian(be, 1) +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): av_int2float(AV_RL32(&src))) + +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, +const float *src, int width, +int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float *src, int is_be, + int width, int32_t *rgb2yuv, int comp) +{ +int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float *src, int is_be, + int width, void *opq) +{ +int i; +for (i=0; isrcFormat; @@ -1663,6 +1753,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_c; +break; } } @@ -1973,6 +2075,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->lumToYV12 = rgbaf16leToY_c; break; +case AV_PIX_FMT_RGBF32BE: +c->lumToYV12 = rgbf32beToY_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->lumToYV12 = rgbaf32beToY_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->lumToYV12 = rgbf32leToY_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->lumToYV12 = rgbaf32leToY_c; +break; } if (c->needAlpha) { if (is16BPS(srcFormat) || isNBPS(srcFormat)) { @@ -1998,6 +2112,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->alpToYV12 = rgbaf16leToA_c; break; +case AV_PIX_FMT_RGBAF32BE: +c->alpToYV12 = rgbaf32beToA_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->alpToYV12 = rgbaf32leToA_c; +break; case AV_PIX_FMT_YA8: c->alpToYV12 = uyvyToY_c; break; diff --git a/libswscale/utils.c b/libswscale/utils.c index 85640a143f..2c520f68d1 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -266,6 +266,10 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_VUYX]= { 1, 1 }, [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, +[AV_PIX_FMT_RGBF32BE]= { 1, 0 }, +[AV_PIX_FMT_RGBF32LE]= { 1, 0 }, +[AV_PIX_FMT_RGBAF32BE] = { 1, 0 }, +[AV_PIX_FMT_RGBAF32LE] = { 1, 0 }, [AV_PIX_FMT_XV30LE] = { 1, 1 }, [AV_PIX_FMT_XV36LE] = { 1, 1 }, }; @@ -1572,6 +1576,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, srcFormat != AV_PIX_FMT_GBRAP16BE && srcFormat != AV_PIX_FMT_GBRAP16LE && srcFormat != AV_PIX_FMT_GBRPF32BE && srcFormat != AV_PIX_FMT_GBRPF32LE && srcFormat != AV_PIX_FMT_GBRAPF32BE && srcFormat != AV_PIX_FMT_GBRAPF32LE && +srcFormat != AV_PIX_FMT_RGBF32BE && srcFormat != AV_PIX_FMT_RGBF32LE
[FFmpeg-devel] [PATCH v4 0/4] swscale: rgbaf32 input/output support
From: Mark Reid This patch series adds swscale input/output support for the packed rgb float formats. A few of the filters also needed support the larger 96/128 bit packed pixel sizes. I also plan to eventually add lossless unscaled conversions between the planer and packed formats. changes since v3 * removed half uv path implementation changes since v2 * add bias to rgbaf32 output to improve non overflowing range changes since v1 * output correct alpha if src doesn't have alpha Mark Reid (4): swscale/input: add rgbaf32 input support avfilter/vf_hflip: add support for packed rgb float formats avfilter/vf_transpose: add support for packed rgb float formats swscale/output: add rgbaf32 output support libavfilter/vf_hflip_init.h | 25 + libavfilter/vf_transpose.c | 44 + libswscale/input.c | 120 +++ libswscale/output.c | 92 + libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 14 ++- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 + tests/ref/fate/filter-pixfmts-crop | 4 + tests/ref/fate/filter-pixfmts-field | 4 + tests/ref/fate/filter-pixfmts-fieldorder | 4 + tests/ref/fate/filter-pixfmts-hflip | 4 + tests/ref/fate/filter-pixfmts-il | 4 + tests/ref/fate/filter-pixfmts-null | 4 + tests/ref/fate/filter-pixfmts-scale | 4 + tests/ref/fate/filter-pixfmts-transpose | 4 + tests/ref/fate/filter-pixfmts-vflip | 4 + tests/ref/fate/sws-floatimg-cmp | 16 +++ 23 files changed, 361 insertions(+), 4 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3 4/4] swscale/output: add rgbaf32 output support
From: Mark Reid --- libswscale/output.c | 92 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 16 +++-- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ tests/ref/fate/sws-floatimg-cmp | 16 + 20 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le diff --git a/libswscale/output.c b/libswscale/output.c index 5c85bff971..1d86a244f9 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t *dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; +int hasAlpha = alpha && alpSrcx; +int pixelStep = alpha ? 4 : 3; +uint32_t *dest32 = (uint32_t*)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; +uint32_t a = av_float2int(1.0f); + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += (1 << 13) - (1 << 29); +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16); +G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16); +B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16); + +dest32[0] = av_float2int(float_mult * (float)R); +dest32[1] = av_float2int(float_mult * (float)G); +dest32[2] = av_float2int(float_mult * (float)B); +if (alpha) +dest32[3] = a; + +dest32 += pixelStep; +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +dest32 = (uint32_t*)dest; +for (i = 0; i < dstW; i++) { +dest32[0] = av_bswap32(dest32[0]); +dest32[1] = av_bswap32(dest32[1]); +dest32[2] = av_bswap32(dest32[2]); +if (alpha) +dest32[3] = av_bswap32(dest32[3]); + +dest32 += pixelStep; +} +} + +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } break; +case AV_PIX_FMT_RGBF32LE: +case AV_PIX_FMT_RGBF32BE: +case AV_PIX_FMT_RGBAF32LE: +case AV_PIX_FMT_RGBAF32BE: +*yuv2packedX = yuv2rgbaf32_full_X_c; +break; case AV_PIX_FMT_RGB24:
[FFmpeg-devel] [PATCH v3 3/4] avfilter/vf_transpose: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_transpose.c | 44 ++ 1 file changed, 44 insertions(+) diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c index 469e66729f..1023d6fe82 100644 --- a/libavfilter/vf_transpose.c +++ b/libavfilter/vf_transpose.c @@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t src_linesize, transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8); } +static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize, +int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 12) { +for (x = 0; x < w; x++) { +*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + x*src_linesize)); +*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + x*src_linesize)); +*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + + +static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 16) { +for (x = 0; x < w; x++) { +*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + x*src_linesize)); +*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + static int config_props_output(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; @@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink) v->transpose_8x8 = transpose_8x8_48_c; break; case 8: v->transpose_block = transpose_block_64_c; v->transpose_8x8 = transpose_8x8_64_c; break; +case 12: v->transpose_block = transpose_block_96_c; + v->transpose_8x8 = transpose_8x8_96_c; break; +case 16: v->transpose_block = transpose_block_128_c; + v->transpose_8x8 = transpose_8x8_128_c; break; } } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3 2/4] avfilter/vf_hflip: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_hflip_init.h | 25 + 1 file changed, 25 insertions(+) diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h index d0319f463d..31173f73fc 100644 --- a/libavfilter/vf_hflip_init.h +++ b/libavfilter/vf_hflip_init.h @@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t *ddst, int w) dst[j] = src[-j]; } +static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint32_t *in = (const uint32_t *)ssrc; +uint32_t *out = (uint32_t *)ddst; + +for (int j = 0; j < w; j++, out += 3, in -= 3) { +out[0] = in[0]; +out[1] = in[1]; +out[2] = in[2]; +} +} + +static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint64_t *in = (const uint64_t *)ssrc; +uint64_t *out = (uint64_t *)ddst; + +for (int j = 0; j < w; j++, out += 2, in -= 2) { +out[0] = in[0]; +out[1] = in[1]; +} +} + static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) { for (int i = 0; i < nb_planes; i++) { @@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) case 4: s->flip_line[i] = hflip_dword_c; break; case 6: s->flip_line[i] = hflip_b48_c; break; case 8: s->flip_line[i] = hflip_qword_c; break; +case 12: s->flip_line[i] = hflip_b96_c; break; +case 16: s->flip_line[i] = hflip_b128_c; break; default: return AVERROR_BUG; } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3 1/4] swscale/input: add rgbaf32 input support
From: Mark Reid --- libswscale/input.c | 172 + libswscale/utils.c | 4 ++ 2 files changed, 176 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index 7ff7bfaa01..4683284b0b 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons rgbaf16_funcs_endian(le, 0) rgbaf16_funcs_endian(be, 1) +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): av_int2float(AV_RL32(&src))) + +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const float *src, int width, + int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 65535.0f))) >> 1; +int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 65535.0f))) >> 1; +int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 65535.0f))) >> 1; + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, +const float *src, int width, +int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float *src, int is_be, + int width, int32_t *rgb2yuv, int comp) +{ +int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float *src, int is_be, + int width, void *opq) +{ +int i; +for (i=0; isrcFormat; @@ -1570,6 +1700,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_half_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_half_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_half_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_half_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_half_c; +break; } } else { switch (srcFormat) { @@ -1663,6 +1805,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_c; +break; } } @@ -1973,6 +2127,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->lumToYV12 = rgbaf16leToY_c; break; +case AV_PIX_FMT_RGBF32BE: +
[FFmpeg-devel] [PATCH v3 0/4] swscale rgbaf32 input/output support
From: Mark Reid This patch series adds swscale input/output support for the packed rgb float formats. A few of the filters also needed support the larger 96/128 bit packed pixel sizes. I also plan to eventually add lossless unscaled conversions between the planer and packed formats. changes since v2 * add bias to rgbaf32 output to improve non overflowing range changes since v1 * output correct alpha if src doesn't have alpha Mark Reid (4): swscale/input: add rgbaf32 input support avfilter/vf_hflip: add support for packed rgb float formats avfilter/vf_transpose: add support for packed rgb float formats swscale/output: add rgbaf32 output support libavfilter/vf_hflip_init.h | 25 libavfilter/vf_transpose.c | 44 ++ libswscale/input.c | 172 +++ libswscale/output.c | 92 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 12 +- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 + tests/ref/fate/filter-pixfmts-crop | 4 + tests/ref/fate/filter-pixfmts-field | 4 + tests/ref/fate/filter-pixfmts-fieldorder | 4 + tests/ref/fate/filter-pixfmts-hflip | 4 + tests/ref/fate/filter-pixfmts-il | 4 + tests/ref/fate/filter-pixfmts-null | 4 + tests/ref/fate/filter-pixfmts-scale | 4 + tests/ref/fate/filter-pixfmts-transpose | 4 + tests/ref/fate/filter-pixfmts-vflip | 4 + tests/ref/fate/sws-floatimg-cmp | 16 +++ 23 files changed, 411 insertions(+), 4 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 4/4] swscale/output: add rgbaf32 output support
From: Mark Reid --- libswscale/output.c | 92 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 16 +++-- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ tests/ref/fate/sws-floatimg-cmp | 16 + 20 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le diff --git a/libswscale/output.c b/libswscale/output.c index 0e1c1225a0..e2ec9cbdf5 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2474,6 +2474,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t *dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; +int hasAlpha = alpha && alpSrcx; +int pixelStep = alpha ? 4 : 3; +uint32_t *dest32 = (uint32_t*)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; +uint32_t a = av_float2int(1.0f); + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += 1 << 13; +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(Y + R, 30); +G = av_clip_uintp2(Y + G, 30); +B = av_clip_uintp2(Y + B, 30); + +dest32[0] = av_float2int(float_mult * (float)(R >> 14)); +dest32[1] = av_float2int(float_mult * (float)(G >> 14)); +dest32[2] = av_float2int(float_mult * (float)(B >> 14)); +if (alpha) +dest32[3] = a; + +dest32 += pixelStep; +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +dest32 = (uint32_t*)dest; +for (i = 0; i < dstW; i++) { +dest32[0] = av_bswap32(dest32[0]); +dest32[1] = av_bswap32(dest32[1]); +dest32[2] = av_bswap32(dest32[2]); +if (alpha) +dest32[3] = av_bswap32(dest32[3]); + +dest32 += pixelStep; +} +} + +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2986,6 +3072,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } break; +case AV_PIX_FMT_RGBF32LE: +case AV_PIX_FMT_RGBF32BE: +case AV_PIX_FMT_RGBAF32LE: +case AV_PIX_FMT_RGBAF32BE: +*yuv2packedX = yuv2rgbaf32_full_X_c; +break; case AV_PIX_FMT_RGB24: *yuv2packedX = yuv2rgb24_full_X_c;
[FFmpeg-devel] [PATCH v2 3/4] avfilter/vf_transpose: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_transpose.c | 44 ++ 1 file changed, 44 insertions(+) diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c index 469e66729f..1023d6fe82 100644 --- a/libavfilter/vf_transpose.c +++ b/libavfilter/vf_transpose.c @@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t src_linesize, transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8); } +static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize, +int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 12) { +for (x = 0; x < w; x++) { +*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + x*src_linesize)); +*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + x*src_linesize)); +*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + + +static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 16) { +for (x = 0; x < w; x++) { +*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + x*src_linesize)); +*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + static int config_props_output(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; @@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink) v->transpose_8x8 = transpose_8x8_48_c; break; case 8: v->transpose_block = transpose_block_64_c; v->transpose_8x8 = transpose_8x8_64_c; break; +case 12: v->transpose_block = transpose_block_96_c; + v->transpose_8x8 = transpose_8x8_96_c; break; +case 16: v->transpose_block = transpose_block_128_c; + v->transpose_8x8 = transpose_8x8_128_c; break; } } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 2/4] avfilter/vf_hflip: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_hflip_init.h | 25 + 1 file changed, 25 insertions(+) diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h index d0319f463d..31173f73fc 100644 --- a/libavfilter/vf_hflip_init.h +++ b/libavfilter/vf_hflip_init.h @@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t *ddst, int w) dst[j] = src[-j]; } +static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint32_t *in = (const uint32_t *)ssrc; +uint32_t *out = (uint32_t *)ddst; + +for (int j = 0; j < w; j++, out += 3, in -= 3) { +out[0] = in[0]; +out[1] = in[1]; +out[2] = in[2]; +} +} + +static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint64_t *in = (const uint64_t *)ssrc; +uint64_t *out = (uint64_t *)ddst; + +for (int j = 0; j < w; j++, out += 2, in -= 2) { +out[0] = in[0]; +out[1] = in[1]; +} +} + static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) { for (int i = 0; i < nb_planes; i++) { @@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) case 4: s->flip_line[i] = hflip_dword_c; break; case 6: s->flip_line[i] = hflip_b48_c; break; case 8: s->flip_line[i] = hflip_qword_c; break; +case 12: s->flip_line[i] = hflip_b96_c; break; +case 16: s->flip_line[i] = hflip_b128_c; break; default: return AVERROR_BUG; } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 1/4] swscale/input: add rgbaf32 input support
From: Mark Reid --- libswscale/input.c | 172 + libswscale/utils.c | 4 ++ 2 files changed, 176 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index 7ff7bfaa01..4683284b0b 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons rgbaf16_funcs_endian(le, 0) rgbaf16_funcs_endian(be, 1) +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): av_int2float(AV_RL32(&src))) + +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const float *src, int width, + int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 65535.0f))) >> 1; +int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 65535.0f))) >> 1; +int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 65535.0f))) >> 1; + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, +const float *src, int width, +int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float *src, int is_be, + int width, int32_t *rgb2yuv, int comp) +{ +int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float *src, int is_be, + int width, void *opq) +{ +int i; +for (i=0; isrcFormat; @@ -1570,6 +1700,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_half_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_half_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_half_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_half_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_half_c; +break; } } else { switch (srcFormat) { @@ -1663,6 +1805,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_c; +break; } } @@ -1973,6 +2127,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->lumToYV12 = rgbaf16leToY_c; break; +case AV_PIX_FMT_RGBF32BE: +
[FFmpeg-devel] [PATCH v2 0/4] swscale rgbaf32 input/output support
From: Mark Reid This patch series adds swscale input/output support for the packed rgb float formats. A few of the filters also needed support the larger 96/128 bit packed pixel sizes. I also plan to eventually add lossless unscaled conversions between the planer and packed formats. changes since v1 * output correct alpha is src doesn't have alpha Mark Reid (4): swscale/input: add rgbaf32 input support avfilter/vf_hflip: add support for packed rgb float formats avfilter/vf_transpose: add support for packed rgb float formats swscale/output: add rgbaf32 output support libavfilter/vf_hflip_init.h | 25 libavfilter/vf_transpose.c | 44 ++ libswscale/input.c | 172 +++ libswscale/output.c | 92 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 12 +- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 + tests/ref/fate/filter-pixfmts-crop | 4 + tests/ref/fate/filter-pixfmts-field | 4 + tests/ref/fate/filter-pixfmts-fieldorder | 4 + tests/ref/fate/filter-pixfmts-hflip | 4 + tests/ref/fate/filter-pixfmts-il | 4 + tests/ref/fate/filter-pixfmts-null | 4 + tests/ref/fate/filter-pixfmts-scale | 4 + tests/ref/fate/filter-pixfmts-transpose | 4 + tests/ref/fate/filter-pixfmts-vflip | 4 + tests/ref/fate/sws-floatimg-cmp | 16 +++ 23 files changed, 411 insertions(+), 4 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] avcodec/tiff: add read support for compressed rgb floating point formats
From: Mark Reid floating point uses a slightly different predictor technique describe here http://chriscox.org/TIFFTN3d1.pdf Here is a link the test files, if someone could add them to fate me https://www.dropbox.com/s/fg59h2os4gb4wug/tiff_fate_samples.zip --- libavcodec/tiff.c | 68 ++ tests/fate/image.mak | 20 ++- tests/ref/fate/tiff-lzw-rgbaf32le | 6 ++ tests/ref/fate/tiff-lzw-rgbf32le | 6 ++ tests/ref/fate/tiff-uncompressed-rgbaf32le | 6 ++ tests/ref/fate/tiff-uncompressed-rgbf32le | 6 ++ tests/ref/fate/tiff-zip-rgbaf32le | 6 ++ tests/ref/fate/tiff-zip-rgbf32le | 6 ++ 8 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tests/ref/fate/tiff-lzw-rgbaf32le create mode 100644 tests/ref/fate/tiff-lzw-rgbf32le create mode 100644 tests/ref/fate/tiff-uncompressed-rgbaf32le create mode 100644 tests/ref/fate/tiff-uncompressed-rgbf32le create mode 100644 tests/ref/fate/tiff-zip-rgbaf32le create mode 100644 tests/ref/fate/tiff-zip-rgbf32le diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c index 3a610ada85..ce31a40e37 100644 --- a/libavcodec/tiff.c +++ b/libavcodec/tiff.c @@ -2249,6 +2249,74 @@ again: } } +/* Floating point predictor + TIFF Technical Note 3 http://chriscox.org/TIFFTN3d1.pdf */ +if (s->predictor == 3) { +int channels = s->bppcount; +int group_size; +uint8_t *tmpbuf; +int bpc; + +dst = five_planes ? five_planes : p->data[plane]; +soff = s->bpp >> 3; +if (s->planar) { +soff = FFMAX(soff / s->bppcount, 1); +channels = 1; +} +ssize = s->width * soff; +bpc = FFMAX(soff / s->bppcount, 1); /* Bytes per component */ +group_size = s->width * channels; + +tmpbuf = av_malloc(ssize); +if (!tmpbuf) +return AVERROR(ENOMEM); + +if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32LE || +s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32LE) { +for (i = 0; i < decoded_height; i++) { +/* Copy first sample byte for each channel */ +for (j = 0; j < channels; j++) +tmpbuf[j] = dst[j]; + +/* Decode horizontal differences */ +for (j = channels; j < ssize; j++) +tmpbuf[j] = dst[j] + tmpbuf[j-channels]; + +/* Combine shuffled bytes from their separate groups. Each + byte of every floating point value in a row of pixels is + split and combined into separate groups. A group of all + the sign/exponents bytes in the row and groups for each + of the upper, mid, and lower mantissa bytes in the row. */ +for (j = 0; j < group_size; j++) { +for (int k = 0; k < bpc; k++) { +dst[bpc * j + k] = tmpbuf[(bpc - k - 1) * group_size + j]; +} +} +dst += stride; +} +} else if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32BE || + s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32BE) { +/* Same as LE only the shuffle at the end is reversed */ +for (i = 0; i < decoded_height; i++) { +for (j = 0; j < channels; j++) +tmpbuf[j] = dst[j]; + +for (j = channels; j < ssize; j++) +tmpbuf[j] = dst[j] + tmpbuf[j-channels]; + +for (j = 0; j < group_size; j++) { +for (int k = 0; k < bpc; k++) { +dst[bpc * j + k] = tmpbuf[k * group_size + j]; +} +} +dst += stride; +} +} else { +av_log(s->avctx, AV_LOG_ERROR, "unsupported floating point pixel format\n"); +} +av_free(tmpbuf); +} + if (s->photometric == TIFF_PHOTOMETRIC_WHITE_IS_ZERO) { int c = (s->avctx->pix_fmt == AV_PIX_FMT_PAL8 ? (1data[plane]; diff --git a/tests/fate/image.mak b/tests/fate/image.mak index 03e794dc48..167c8ccf2c 100644 --- a/tests/fate/image.mak +++ b/tests/fate/image.mak @@ -501,7 +501,25 @@ fate-tiff-fax-g3: CMD = framecrc -i $(TARGET_SAMPLES)/CCITT_fax/G31D.TIF FATE_TIFF += fate-tiff-fax-g3s fate-tiff-fax-g3s: CMD = framecrc -i $(TARGET_SAMPLES)/CCITT_fax/G31DS.TIF -FATE_TIFF-$(call DEMDEC, IMAGE2, TIFF) += $(FATE_TIFF) +FATE_TIFF += fate-tiff-uncompressed-rgbf32le +fate-tiff-uncompressed-rgbf32le: CMD = framecrc -i $(TARGET_SAMPLES)/tiff/uncompressed_rgbf32le.tif + +FATE_
[FFmpeg-devel] [PATCH] avcodec/tiff: add support for decoding compressed rgb floating point formats
From: Mark Reid floating point uses a slightly different predictor technique describe here http://chriscox.org/TIFFTN3d1.pdf Here is a link the test files, if someone could add them to fate me https://www.dropbox.com/s/fg59h2os4gb4wug/tiff_fate_samples.zip --- libavcodec/tiff.c | 66 +- tests/fate/image.mak | 18 ++ tests/ref/fate/tiff-lzw-rgbaf32le | 6 ++ tests/ref/fate/tiff-lzw-rgbf32le | 6 ++ tests/ref/fate/tiff-uncompressed-rgbaf32le | 6 ++ tests/ref/fate/tiff-uncompressed-rgbf32le | 6 ++ tests/ref/fate/tiff-zip-rgbaf32le | 6 ++ tests/ref/fate/tiff-zip-rgbf32le | 6 ++ 8 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 tests/ref/fate/tiff-lzw-rgbaf32le create mode 100644 tests/ref/fate/tiff-lzw-rgbf32le create mode 100644 tests/ref/fate/tiff-uncompressed-rgbaf32le create mode 100644 tests/ref/fate/tiff-uncompressed-rgbf32le create mode 100644 tests/ref/fate/tiff-zip-rgbaf32le create mode 100644 tests/ref/fate/tiff-zip-rgbf32le diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c index 3a610ada85..c1d07f8c3e 100644 --- a/libavcodec/tiff.c +++ b/libavcodec/tiff.c @@ -1913,7 +1913,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p, TiffContext *const s = avctx->priv_data; unsigned off, last_off = 0; int le, ret, plane, planes; -int i, j, entries, stride; +int i, j, k, entries, stride; unsigned soff, ssize; uint8_t *dst; GetByteContext stripsizes; @@ -2249,6 +2249,70 @@ again: } } +/* Floating point predictor +TIFF Technical Note 3 http://chriscox.org/TIFFTN3d1.pdf */ +if (s->predictor == 3) { +int channels = s->bppcount; +int lane_offset; +uint8_t *tmpbuf; +int bpc; + +dst = five_planes ? five_planes : p->data[plane]; +soff = s->bpp >> 3; +if (s->planar) { +soff = FFMAX(soff / s->bppcount, 1); +channels = 1; +} +ssize = s->width * soff; +bpc = FFMAX(soff / s->bppcount, 1); /* bytes per component */ +lane_offset = s->width * channels; + +tmpbuf = (uint8_t*)av_malloc(ssize); +if (!tmpbuf) +return AVERROR(ENOMEM); + +if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32LE || +s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32LE) { +for (i = 0; i < decoded_height; i++) { +/* copy first sample byte for each channel */ +for (j = 0; j < channels; j++) +tmpbuf[j] = dst[j]; + +/* decode horizontal differences */ +for (j = channels; j < ssize; j++) +tmpbuf[j] = dst[j] + tmpbuf[j-channels]; + +/* combine shuffled bytes from their sepearate lanes */ +for (j = 0; j < lane_offset; j++) { +for (k = 0; k < bpc; k++) { +dst[bpc * j + k] = tmpbuf[(bpc - k - 1) * lane_offset + j]; +} +} +dst += stride; +} +} else if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32BE || + s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32BE) { +/* same as LE only the shuffle at the end is reversed */ +for (i = 0; i < decoded_height; i++) { +for (j = 0; j < channels; j++) +tmpbuf[j] = dst[j]; + +for (j = channels; j < ssize; j++) +tmpbuf[j] = dst[j] + tmpbuf[j-channels]; + +for (j = 0; j < lane_offset; j++) { +for (k = 0; k < bpc; k++) { +dst[bpc * j + k] = tmpbuf[k * lane_offset + j]; +} +} +dst += stride; +} +} else { +av_log(s->avctx, AV_LOG_ERROR, "unsupported floating point pixel format\n"); +} +av_free(tmpbuf); +} + if (s->photometric == TIFF_PHOTOMETRIC_WHITE_IS_ZERO) { int c = (s->avctx->pix_fmt == AV_PIX_FMT_PAL8 ? (1data[plane]; diff --git a/tests/fate/image.mak b/tests/fate/image.mak index 03e794dc48..971531520d 100644 --- a/tests/fate/image.mak +++ b/tests/fate/image.mak @@ -501,6 +501,24 @@ fate-tiff-fax-g3: CMD = framecrc -i $(TARGET_SAMPLES)/CCITT_fax/G31D.TIF FATE_TIFF += fate-tiff-fax-g3s fate-tiff-fax-g3s: CMD = framecrc -i $(TARGET_SAMPLES)/CCITT_fax/G31DS.TIF +FATE_TIFF += fate-tiff-uncompressed-rgbf32le +fate-tiff-uncompressed-rgbf32le: CMD = framecrc -i $(TARGET_SAMPLES)/tiff/uncompressed_rgbf32le.tif + +FATE_TIFF += fate-tiff-uncompressed-rgbaf32le +
[FFmpeg-devel] [PATCH 4/4] swscale/output: add rgbaf32 output support
From: Mark Reid --- libswscale/output.c | 89 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 16 +++-- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ tests/ref/fate/sws-floatimg-cmp | 16 + 20 files changed, 167 insertions(+), 8 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le diff --git a/libswscale/output.c b/libswscale/output.c index 0e1c1225a0..b3e064ae8d 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2474,6 +2474,89 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t *dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx; +int pixelStep = desc->flags & AV_PIX_FMT_FLAG_ALPHA ? 4 : 3; +uint32_t *dest32 = (uint32_t*)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += 1 << 13; +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(Y + R, 30); +G = av_clip_uintp2(Y + G, 30); +B = av_clip_uintp2(Y + B, 30); + +dest32[0] = av_float2int(float_mult * (float)(R >> 14)); +dest32[1] = av_float2int(float_mult * (float)(G >> 14)); +dest32[2] = av_float2int(float_mult * (float)(B >> 14)); +if (hasAlpha) +dest32[3] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); + +dest32 += pixelStep; +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +dest32 = (uint32_t*)dest; +for (i = 0; i < dstW; i++) { +dest32[0] = av_bswap32(dest32[0]); +dest32[1] = av_bswap32(dest32[1]); +dest32[2] = av_bswap32(dest32[2]); +if (hasAlpha) +dest32[3] = av_bswap32(dest32[3]); + +dest32 += pixelStep; +} +} + +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2986,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } break; +case AV_PIX_FMT_RGBF32LE: +case AV_PIX_FMT_RGBF32BE: +case AV_PIX_FMT_RGBAF32LE: +case AV_PIX_FMT_RGBAF32BE: +*yuv2packedX = yuv2rgbaf32_full_X_c; +break; case AV_PIX_FMT_RGB24: *yuv2packedX = yuv2rgb24_full_X_c; *yuv2packed2 = yuv2rgb24_full_2_c;
[FFmpeg-devel] [PATCH 3/4] avfilter/vf_transpose: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_transpose.c | 44 ++ 1 file changed, 44 insertions(+) diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c index 469e66729f..1023d6fe82 100644 --- a/libavfilter/vf_transpose.c +++ b/libavfilter/vf_transpose.c @@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t src_linesize, transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8); } +static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize, +int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 12) { +for (x = 0; x < w; x++) { +*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + x*src_linesize)); +*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + x*src_linesize)); +*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + + +static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + int w, int h) +{ +int x, y; +for (y = 0; y < h; y++, dst += dst_linesize, src += 16) { +for (x = 0; x < w; x++) { +*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + x*src_linesize)); +*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + x*src_linesize)); +} +} +} + +static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize, +uint8_t *dst, ptrdiff_t dst_linesize) +{ +transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8); +} + static int config_props_output(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; @@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink) v->transpose_8x8 = transpose_8x8_48_c; break; case 8: v->transpose_block = transpose_block_64_c; v->transpose_8x8 = transpose_8x8_64_c; break; +case 12: v->transpose_block = transpose_block_96_c; + v->transpose_8x8 = transpose_8x8_96_c; break; +case 16: v->transpose_block = transpose_block_128_c; + v->transpose_8x8 = transpose_8x8_128_c; break; } } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/4] avfilter/vf_hflip: add support for packed rgb float formats
From: Mark Reid --- libavfilter/vf_hflip_init.h | 25 + 1 file changed, 25 insertions(+) diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h index d0319f463d..31173f73fc 100644 --- a/libavfilter/vf_hflip_init.h +++ b/libavfilter/vf_hflip_init.h @@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t *ddst, int w) dst[j] = src[-j]; } +static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint32_t *in = (const uint32_t *)ssrc; +uint32_t *out = (uint32_t *)ddst; + +for (int j = 0; j < w; j++, out += 3, in -= 3) { +out[0] = in[0]; +out[1] = in[1]; +out[2] = in[2]; +} +} + +static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w) +{ +const uint64_t *in = (const uint64_t *)ssrc; +uint64_t *out = (uint64_t *)ddst; + +for (int j = 0; j < w; j++, out += 2, in -= 2) { +out[0] = in[0]; +out[1] = in[1]; +} +} + static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) { for (int i = 0; i < nb_planes; i++) { @@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes) case 4: s->flip_line[i] = hflip_dword_c; break; case 6: s->flip_line[i] = hflip_b48_c; break; case 8: s->flip_line[i] = hflip_qword_c; break; +case 12: s->flip_line[i] = hflip_b96_c; break; +case 16: s->flip_line[i] = hflip_b128_c; break; default: return AVERROR_BUG; } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/4] swscale/input: add rgbaf32 input support
From: Mark Reid --- libswscale/input.c | 172 + libswscale/utils.c | 4 ++ 2 files changed, 176 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index 7ff7bfaa01..4683284b0b 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons rgbaf16_funcs_endian(le, 0) rgbaf16_funcs_endian(be, 1) +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): av_int2float(AV_RL32(&src))) + +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const float *src, int width, + int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 65535.0f))) >> 1; +int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 65535.0f))) >> 1; +int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 65535.0f))) >> 1; + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, +const float *src, int width, +int32_t *rgb2yuv, int comp) +{ +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float *src, int is_be, + int width, int32_t *rgb2yuv, int comp) +{ +int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +int i; +for (i = 0; i < width; i++) { +int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); +int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); +int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + +dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; +} +} + +static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float *src, int is_be, + int width, void *opq) +{ +int i; +for (i=0; isrcFormat; @@ -1570,6 +1700,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_half_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_half_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_half_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_half_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_half_c; +break; } } else { switch (srcFormat) { @@ -1663,6 +1805,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_c; break; +case AV_PIX_FMT_RGBF32BE: +c->chrToYV12 = rgbf32beToUV_c; +break; +case AV_PIX_FMT_RGBAF32BE: +c->chrToYV12 = rgbaf32beToUV_c; +break; +case AV_PIX_FMT_RGBF32LE: +c->chrToYV12 = rgbf32leToUV_c; +break; +case AV_PIX_FMT_RGBAF32LE: +c->chrToYV12 = rgbaf32leToUV_c; +break; } } @@ -1973,6 +2127,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->lumToYV12 = rgbaf16leToY_c; break; +case AV_PIX_FMT_RGBF32BE: +
[FFmpeg-devel] [PATCH 0/4] swscale rgbaf32 input/output support
From: Mark Reid This patch series adds swscale input/output support for the packed rgb float formats. A few of the filters also needed support the larger 96/128 bit packed pixel sizes. I also plan to eventually add lossless unscaled conversions between the planer and packed formats. Mark Reid (4): swscale/input: add rgbaf32 input support avfilter/vf_hflip: add support for packed rgb float formats avfilter/vf_transpose: add support for packed rgb float formats swscale/output: add rgbaf32 output support libavfilter/vf_hflip_init.h | 25 libavfilter/vf_transpose.c | 44 ++ libswscale/input.c | 172 +++ libswscale/output.c | 89 libswscale/swscale_unscaled.c| 4 +- libswscale/tests/floatimg_cmp.c | 4 +- libswscale/utils.c | 12 +- libswscale/yuv2rgb.c | 2 + tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + tests/ref/fate/filter-pixdesc-rgbf32be | 1 + tests/ref/fate/filter-pixdesc-rgbf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 + tests/ref/fate/filter-pixfmts-crop | 4 + tests/ref/fate/filter-pixfmts-field | 4 + tests/ref/fate/filter-pixfmts-fieldorder | 4 + tests/ref/fate/filter-pixfmts-hflip | 4 + tests/ref/fate/filter-pixfmts-il | 4 + tests/ref/fate/filter-pixfmts-null | 4 + tests/ref/fate/filter-pixfmts-scale | 4 + tests/ref/fate/filter-pixfmts-transpose | 4 + tests/ref/fate/filter-pixfmts-vflip | 4 + tests/ref/fate/sws-floatimg-cmp | 16 +++ 23 files changed, 408 insertions(+), 4 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3 2/2] swscale/x86/input.asm: add x86-optimized planer rgb2yuv functions
From: Mark Reid sse2 only operates on 2 lanes per loop for to_y and to_uv functions, due to the lack of pmulld instruction. Emulating pmulld with 2 pmuludq and shuffles proved too costly and made to_uv functions slower then the c implementation. For to_y on sse2 only float functions are generated, I was are not able outperform the c implementation on the integer pixel formats. For to_a on see4 only the float functions are generated. sse2 and sse4 generated nearly identical performing code on integer pixel formats, so only sse2/avx2 versions are generated. planar_gbrp_to_y_512_c: 1197.5 planar_gbrp_to_y_512_sse4: 444.5 planar_gbrp_to_y_512_avx2: 287.5 planar_gbrap_to_y_512_c: 1204.5 planar_gbrap_to_y_512_sse4: 447.5 planar_gbrap_to_y_512_avx2: 289.5 planar_gbrp9be_to_y_512_c: 1380.0 planar_gbrp9be_to_y_512_sse4: 543.5 planar_gbrp9be_to_y_512_avx2: 340.0 planar_gbrp9le_to_y_512_c: 1200.5 planar_gbrp9le_to_y_512_sse4: 442.0 planar_gbrp9le_to_y_512_avx2: 282.0 planar_gbrp10be_to_y_512_c: 1378.5 planar_gbrp10be_to_y_512_sse4: 544.0 planar_gbrp10be_to_y_512_avx2: 337.5 planar_gbrp10le_to_y_512_c: 1200.0 planar_gbrp10le_to_y_512_sse4: 448.0 planar_gbrp10le_to_y_512_avx2: 285.5 planar_gbrap10be_to_y_512_c: 1380.0 planar_gbrap10be_to_y_512_sse4: 542.0 planar_gbrap10be_to_y_512_avx2: 340.5 planar_gbrap10le_to_y_512_c: 1199.0 planar_gbrap10le_to_y_512_sse4: 446.0 planar_gbrap10le_to_y_512_avx2: 289.5 planar_gbrp12be_to_y_512_c: 10563.0 planar_gbrp12be_to_y_512_sse4: 542.5 planar_gbrp12be_to_y_512_avx2: 339.0 planar_gbrp12le_to_y_512_c: 1201.0 planar_gbrp12le_to_y_512_sse4: 440.5 planar_gbrp12le_to_y_512_avx2: 286.0 planar_gbrap12be_to_y_512_c: 1701.5 planar_gbrap12be_to_y_512_sse4: 917.0 planar_gbrap12be_to_y_512_avx2: 338.5 planar_gbrap12le_to_y_512_c: 1201.0 planar_gbrap12le_to_y_512_sse4: 444.5 planar_gbrap12le_to_y_512_avx2: 288.0 planar_gbrp14be_to_y_512_c: 1370.5 planar_gbrp14be_to_y_512_sse4: 545.0 planar_gbrp14be_to_y_512_avx2: 338.5 planar_gbrp14le_to_y_512_c: 1199.0 planar_gbrp14le_to_y_512_sse4: 444.0 planar_gbrp14le_to_y_512_avx2: 279.5 planar_gbrp16be_to_y_512_c: 1364.0 planar_gbrp16be_to_y_512_sse4: 544.5 planar_gbrp16be_to_y_512_avx2: 339.5 planar_gbrp16le_to_y_512_c: 1201.0 planar_gbrp16le_to_y_512_sse4: 445.5 planar_gbrp16le_to_y_512_avx2: 280.5 planar_gbrap16be_to_y_512_c: 1377.0 planar_gbrap16be_to_y_512_sse4: 545.0 planar_gbrap16be_to_y_512_avx2: 338.5 planar_gbrap16le_to_y_512_c: 1201.0 planar_gbrap16le_to_y_512_sse4: 442.0 planar_gbrap16le_to_y_512_avx2: 279.0 planar_gbrpf32be_to_y_512_c: 4113.0 planar_gbrpf32be_to_y_512_sse2: 2438.0 planar_gbrpf32be_to_y_512_sse4: 1068.0 planar_gbrpf32be_to_y_512_avx2: 904.5 planar_gbrpf32le_to_y_512_c: 3818.5 planar_gbrpf32le_to_y_512_sse2: 2024.5 planar_gbrpf32le_to_y_512_sse4: 1241.5 planar_gbrpf32le_to_y_512_avx2: 657.0 planar_gbrapf32be_to_y_512_c: 3707.0 planar_gbrapf32be_to_y_512_sse2: 2444.0 planar_gbrapf32be_to_y_512_sse4: 1077.0 planar_gbrapf32be_to_y_512_avx2: 909.0 planar_gbrapf32le_to_y_512_c: 3822.0 planar_gbrapf32le_to_y_512_sse2: 2024.5 planar_gbrapf32le_to_y_512_sse4: 1176.0 planar_gbrapf32le_to_y_512_avx2: 658.5 planar_gbrp_to_uv_512_c: 2325.8 planar_gbrp_to_uv_512_sse2: 1726.8 planar_gbrp_to_uv_512_sse4: 771.8 planar_gbrp_to_uv_512_avx2: 506.8 planar_gbrap_to_uv_512_c: 2281.8 planar_gbrap_to_uv_512_sse2: 1726.3 planar_gbrap_to_uv_512_sse4: 768.3 planar_gbrap_to_uv_512_avx2: 496.3 planar_gbrp9be_to_uv_512_c: 2336.8 planar_gbrp9be_to_uv_512_sse2: 1924.8 planar_gbrp9be_to_uv_512_sse4: 852.3 planar_gbrp9be_to_uv_512_avx2: 552.8 planar_gbrp9le_to_uv_512_c: 2270.3 planar_gbrp9le_to_uv_512_sse2: 1512.3 planar_gbrp9le_to_uv_512_sse4: 764.3 planar_gbrp9le_to_uv_512_avx2: 491.3 planar_gbrp10be_to_uv_512_c: 2281.8 planar_gbrp10be_to_uv_512_sse2: 1917.8 planar_gbrp10be_to_uv_512_sse4: 855.3 planar_gbrp10be_to_uv_512_avx2: 541.3 planar_gbrp10le_to_uv_512_c: 2269.8 planar_gbrp10le_to_uv_512_sse2: 1515.3 planar_gbrp10le_to_uv_512_sse4: 759.8 planar_gbrp10le_to_uv_512_avx2: 487.8 planar_gbrap10be_to_uv_512_c: 2382.3 planar_gbrap10be_to_uv_512_sse2: 1924.8 planar_gbrap10be_to_uv_512_sse4: 855.3 planar_gbrap10be_to_uv_512_avx2: 540.8 planar_gbrap10le_to_uv_512_c: 2382.3 planar_gbrap10le_to_uv_512_sse2: 1512.3 planar_gbrap10le_to_uv_512_sse4: 759.3 planar_gbrap10le_to_uv_512_avx2: 484.8 planar_gbrp12be_to_uv_512_c: 2283.8 planar_gbrp12be_to_uv_512_sse2: 1936.8 planar_gbrp12be_to_uv_512_sse4: 858.3 planar_gbrp12be_to_uv_512_avx2: 541.3 planar_gbrp12le_to_uv_512_c: 2278.8 planar_gbrp12le_to_uv_512_sse2: 1507.3 planar_gbrp12le_to_uv_512_sse4: 760.3 planar_gbrp12le_to_uv_512_avx2: 485.8 planar_gbrap12be_to_uv_512_c: 2385.3 planar_gbrap12be_to_uv_512_sse2: 1927.8 planar_gbrap12be_to_uv_512_sse4: 855.3 planar_gbrap12be_to_uv_512_avx2: 539.8 planar_gbrap12le_to_uv_512_c: 2377.3 planar_gbrap12le_to_uv_512_sse2: 1516.3 planar_gbrap12le_to_uv_512_sse4: 759.3 planar_gbrap12le_to_uv_512_avx2: 484.8 planar_gbrp14be_to_uv_512_c: 2283.8 planar_gbrp14be_to_uv_512_sse2:
[FFmpeg-devel] [PATCH v3 1/2] swscale/x86/output.asm: add x86-optimized planer gbr yuv2anyX functions
From: Mark Reid changes since v2: * fixed label changes since v1: * remove vex intruction on sse4 path * some load/pack marcos use less intructions * fixed some typos yuv2gbrp_full_X_4_512_c: 12757.6 yuv2gbrp_full_X_4_512_sse2: 8946.6 yuv2gbrp_full_X_4_512_sse4: 5138.6 yuv2gbrp_full_X_4_512_avx2: 3889.6 yuv2gbrap_full_X_4_512_c: 15368.6 yuv2gbrap_full_X_4_512_sse2: 11916.1 yuv2gbrap_full_X_4_512_sse4: 6294.6 yuv2gbrap_full_X_4_512_avx2: 3477.1 yuv2gbrp9be_full_X_4_512_c: 14381.6 yuv2gbrp9be_full_X_4_512_sse2: 9139.1 yuv2gbrp9be_full_X_4_512_sse4: 5150.1 yuv2gbrp9be_full_X_4_512_avx2: 2834.6 yuv2gbrp9le_full_X_4_512_c: 12990.1 yuv2gbrp9le_full_X_4_512_sse2: 9118.1 yuv2gbrp9le_full_X_4_512_sse4: 5132.1 yuv2gbrp9le_full_X_4_512_avx2: 2833.1 yuv2gbrp10be_full_X_4_512_c: 14401.6 yuv2gbrp10be_full_X_4_512_sse2: 9133.1 yuv2gbrp10be_full_X_4_512_sse4: 5126.1 yuv2gbrp10be_full_X_4_512_avx2: 2837.6 yuv2gbrp10le_full_X_4_512_c: 12718.1 yuv2gbrp10le_full_X_4_512_sse2: 9106.1 yuv2gbrp10le_full_X_4_512_sse4: 5120.1 yuv2gbrp10le_full_X_4_512_avx2: 2826.1 yuv2gbrap10be_full_X_4_512_c: 18535.6 yuv2gbrap10be_full_X_4_512_sse2: 33617.6 yuv2gbrap10be_full_X_4_512_sse4: 6264.1 yuv2gbrap10be_full_X_4_512_avx2: 3422.1 yuv2gbrap10le_full_X_4_512_c: 16724.1 yuv2gbrap10le_full_X_4_512_sse2: 11787.1 yuv2gbrap10le_full_X_4_512_sse4: 6282.1 yuv2gbrap10le_full_X_4_512_avx2: 3441.6 yuv2gbrp12be_full_X_4_512_c: 13723.6 yuv2gbrp12be_full_X_4_512_sse2: 9128.1 yuv2gbrp12be_full_X_4_512_sse4: 7997.6 yuv2gbrp12be_full_X_4_512_avx2: 2844.1 yuv2gbrp12le_full_X_4_512_c: 12257.1 yuv2gbrp12le_full_X_4_512_sse2: 9107.6 yuv2gbrp12le_full_X_4_512_sse4: 5142.6 yuv2gbrp12le_full_X_4_512_avx2: 2837.6 yuv2gbrap12be_full_X_4_512_c: 18511.1 yuv2gbrap12be_full_X_4_512_sse2: 12156.6 yuv2gbrap12be_full_X_4_512_sse4: 6251.1 yuv2gbrap12be_full_X_4_512_avx2: 3444.6 yuv2gbrap12le_full_X_4_512_c: 16687.1 yuv2gbrap12le_full_X_4_512_sse2: 11785.1 yuv2gbrap12le_full_X_4_512_sse4: 6243.6 yuv2gbrap12le_full_X_4_512_avx2: 3446.1 yuv2gbrp14be_full_X_4_512_c: 13690.6 yuv2gbrp14be_full_X_4_512_sse2: 9120.6 yuv2gbrp14be_full_X_4_512_sse4: 5138.1 yuv2gbrp14be_full_X_4_512_avx2: 2843.1 yuv2gbrp14le_full_X_4_512_c: 14995.6 yuv2gbrp14le_full_X_4_512_sse2: 9119.1 yuv2gbrp14le_full_X_4_512_sse4: 5126.1 yuv2gbrp14le_full_X_4_512_avx2: 2843.1 yuv2gbrp16be_full_X_4_512_c: 12367.1 yuv2gbrp16be_full_X_4_512_sse2: 8233.6 yuv2gbrp16be_full_X_4_512_sse4: 4820.1 yuv2gbrp16be_full_X_4_512_avx2: 2666.6 yuv2gbrp16le_full_X_4_512_c: 10904.1 yuv2gbrp16le_full_X_4_512_sse2: 8214.1 yuv2gbrp16le_full_X_4_512_sse4: 4824.1 yuv2gbrp16le_full_X_4_512_avx2: 2629.1 yuv2gbrap16be_full_X_4_512_c: 26569.6 yuv2gbrap16be_full_X_4_512_sse2: 10884.1 yuv2gbrap16be_full_X_4_512_sse4: 5488.1 yuv2gbrap16be_full_X_4_512_avx2: 3272.1 yuv2gbrap16le_full_X_4_512_c: 14010.1 yuv2gbrap16le_full_X_4_512_sse2: 10562.1 yuv2gbrap16le_full_X_4_512_sse4: 5463.6 yuv2gbrap16le_full_X_4_512_avx2: 3255.1 yuv2gbrpf32be_full_X_4_512_c: 14524.1 yuv2gbrpf32be_full_X_4_512_sse2: 8552.6 yuv2gbrpf32be_full_X_4_512_sse4: 4636.1 yuv2gbrpf32be_full_X_4_512_avx2: 2474.6 yuv2gbrpf32le_full_X_4_512_c: 13060.6 yuv2gbrpf32le_full_X_4_512_sse2: 9682.6 yuv2gbrpf32le_full_X_4_512_sse4: 4298.1 yuv2gbrpf32le_full_X_4_512_avx2: 2453.1 yuv2gbrapf32be_full_X_4_512_c: 18629.6 yuv2gbrapf32be_full_X_4_512_sse2: 11363.1 yuv2gbrapf32be_full_X_4_512_sse4: 15201.6 yuv2gbrapf32be_full_X_4_512_avx2: 3727.1 yuv2gbrapf32le_full_X_4_512_c: 16677.6 yuv2gbrapf32le_full_X_4_512_sse2: 10221.6 yuv2gbrapf32le_full_X_4_512_sse4: 5693.6 yuv2gbrapf32le_full_X_4_512_avx2: 3656.6 --- libswscale/x86/output.asm | 434 +- libswscale/x86/swscale.c | 98 + tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/sw_gbrp.c | 198 + tests/fate/checkasm.mak | 1 + 7 files changed, 733 insertions(+), 2 deletions(-) create mode 100644 tests/checkasm/sw_gbrp.c diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 52cf9f2c2e..e277a61449 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -38,7 +38,49 @@ pw_32: times 8 dw 32 pd_255:times 8 dd 255 pw_512:times 8 dw 512 pw_1024: times 8 dw 1024 - +pd_65535_invf: times 8 dd 0x37800080 ;1.0/65535.0 +pd_yuv2gbrp16_start: times 8 dd -0x4000 +pd_yuv2gbrp_y_start: times 8 dd (1 << 9) +pd_yuv2gbrp_uv_start: times 8 dd ((1 << 9) - (128 << 19)) +pd_yuv2gbrp_a_start: times 8 dd (1 << 18) +pd_yuv2gbrp16_offset: times 8 dd 0x1 ;(1 << 16) +pd_yuv2gbrp16_round13: times 8 dd 0x02000 ;(1 << 13) +pd_yuv2gbrp16_a_offset:times 8 dd 0x20002000 +pd_yuv2gbrp16_upper30: times 8 dd 0x3FFF ;(1<<30) - 1 +pd_yuv2gbrp16_upper27: times 8 dd 0x07FF ;(1<<27) - 1 +pd_yuv2gbrp16_upperC: times 8 dd 0xC000 +pb_pack_shuffle8: db 0, 4, 8, 12, \ +
[FFmpeg-devel] [PATCH v3 2/2] swscale/input: clip rgbf32 values before lrintf
From: Mark Reid if the float pixel * 65535.0f > 2147483647.0f lrintf may overfow and return negative values, depending on implementation. nan and +/-inf values may also be implementation defined clip the value first so lrintf always works. values < 0.0f, -inf, nan = 0.0f values > 65535.0f, +inf = 65535.0f old timings 195960 decicycles in planar_rgbf32le_to_uv, 1 runs, 0 skips 186120 decicycles in planar_rgbf32le_to_uv, 2 runs, 0 skips 188645 decicycles in planar_rgbf32le_to_uv, 4 runs, 0 skips 183625 decicycles in planar_rgbf32le_to_uv, 8 runs, 0 skips 181157 decicycles in planar_rgbf32le_to_uv, 16 runs, 0 skips 177533 decicycles in planar_rgbf32le_to_uv, 32 runs, 0 skips 175689 decicycles in planar_rgbf32le_to_uv, 64 runs, 0 skips 232960 decicycles in planar_rgbf32be_to_uv, 1 runs, 0 skips 221380 decicycles in planar_rgbf32be_to_uv, 2 runs, 0 skips 216640 decicycles in planar_rgbf32be_to_uv, 4 runs, 0 skips 213505 decicycles in planar_rgbf32be_to_uv, 8 runs, 0 skips 211558 decicycles in planar_rgbf32be_to_uv, 16 runs, 0 skips 210596 decicycles in planar_rgbf32be_to_uv, 32 runs, 0 skips 210202 decicycles in planar_rgbf32be_to_uv, 64 runs, 0 skips 161680 decicycles in planar_rgbf32le_to_y, 1 runs, 0 skips 153540 decicycles in planar_rgbf32le_to_y, 2 runs, 0 skips 148255 decicycles in planar_rgbf32le_to_y, 4 runs, 0 skips 140600 decicycles in planar_rgbf32le_to_y, 8 runs, 0 skips 132935 decicycles in planar_rgbf32le_to_y, 16 runs, 0 skips 128531 decicycles in planar_rgbf32le_to_y, 32 runs, 0 skips 140933 decicycles in planar_rgbf32le_to_y, 64 runs, 0 skips 190980 decicycles in planar_rgbf32be_to_y, 1 runs, 0 skips 176080 decicycles in planar_rgbf32be_to_y, 2 runs, 0 skips 167980 decicycles in planar_rgbf32be_to_y, 4 runs, 0 skips 164685 decicycles in planar_rgbf32be_to_y, 8 runs, 0 skips 162751 decicycles in planar_rgbf32be_to_y, 16 runs, 0 skips 162404 decicycles in planar_rgbf32be_to_y, 32 runs, 0 skips 167849 decicycles in planar_rgbf32be_to_y, 64 runs, 0 skips new timings 183320 decicycles in planar_rgbf32le_to_uv, 1 runs, 0 skips 175700 decicycles in planar_rgbf32le_to_uv, 2 runs, 0 skips 179570 decicycles in planar_rgbf32le_to_uv, 4 runs, 0 skips 172932 decicycles in planar_rgbf32le_to_uv, 8 runs, 0 skips 168707 decicycles in planar_rgbf32le_to_uv, 16 runs, 0 skips 165224 decicycles in planar_rgbf32le_to_uv, 32 runs, 0 skips 163423 decicycles in planar_rgbf32le_to_uv, 64 runs, 0 skips 184940 decicycles in planar_rgbf32be_to_uv, 1 runs, 0 skips 185150 decicycles in planar_rgbf32be_to_uv, 2 runs, 0 skips 185790 decicycles in planar_rgbf32be_to_uv, 4 runs, 0 skips 185472 decicycles in planar_rgbf32be_to_uv, 8 runs, 0 skips 185277 decicycles in planar_rgbf32be_to_uv, 16 runs, 0 skips 185813 decicycles in planar_rgbf32be_to_uv, 32 runs, 0 skips 185332 decicycles in planar_rgbf32be_to_uv, 64 runs, 0 skips 145400 decicycles in planar_rgbf32le_to_y, 1 runs, 0 skips 145100 decicycles in planar_rgbf32le_to_y, 2 runs, 0 skips 143490 decicycles in planar_rgbf32le_to_y, 4 runs, 0 skips 136687 decicycles in planar_rgbf32le_to_y, 8 runs, 0 skips 131271 decicycles in planar_rgbf32le_to_y, 16 runs, 0 skips 128698 decicycles in planar_rgbf32le_to_y, 32 runs, 0 skips 127170 decicycles in planar_rgbf32le_to_y, 64 runs, 0 skips 156020 decicycles in planar_rgbf32be_to_y, 1 runs, 0 skips 146990 decicycles in planar_rgbf32be_to_y, 2 runs, 0 skips 142020 decicycles in planar_rgbf32be_to_y, 4 runs, 0 skips 141052 decicycles in planar_rgbf32be_to_y, 8 runs, 0 skips 138973 decicycles in planar_rgbf32be_to_y, 16 runs, 0 skips 138027 decicycles in planar_rgbf32be_to_y, 32 runs, 0 skips 143939 decicycles in planar_rgbf32be_to_y, 64 runs, 0 skips --- libswscale/input.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 90efdd2ffc..1351ea5bd4 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -973,7 +973,7 @@ static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_s uint16_t *dst= (uint16_t *)_dst; for (i = 0; i < width; i++) { -dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i))); +dst[i] = lrintf(av_clipf(65535.0f * rdpx(src[3] + i), 0.0f, 65535.0f)); } } @@ -987,9 +987,9 @@ static av_always_inline void planar_rgbf
[FFmpeg-devel] [PATCH v3 1/2] libavutil/common: clip nan value to amin
From: Mark Reid Changes av_clipf to return amin if a is nan. Before if a is nan av_clipf_c returned nan and av_clipf_sse would return amax. Now the both should behave the same. This works because nan > amin is false. The max(nan, amin) will be amin. --- libavutil/common.h | 12 ++-- libavutil/x86/intmath.h | 8 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libavutil/common.h b/libavutil/common.h index 3cc1f07566..9338bda7d5 100644 --- a/libavutil/common.h +++ b/libavutil/common.h @@ -379,6 +379,8 @@ static av_always_inline int64_t av_sat_sub64_c(int64_t a, int64_t b) { /** * Clip a float value into the amin-amax range. + * If a is nan or -inf amin will be returned. + * If a is +inf amax will be returned. * @param a value to clip * @param amin minimum value of the clip range * @param amax maximum value of the clip range @@ -389,13 +391,13 @@ static av_always_inline av_const float av_clipf_c(float a, float amin, float ama #if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 if (amin > amax) abort(); #endif -if (a < amin) return amin; -else if (a > amax) return amax; -else return a; +return FFMIN(FFMAX(a, amin), amax); } /** * Clip a double value into the amin-amax range. + * If a is nan or -inf amin will be returned. + * If a is +inf amax will be returned. * @param a value to clip * @param amin minimum value of the clip range * @param amax maximum value of the clip range @@ -406,9 +408,7 @@ static av_always_inline av_const double av_clipd_c(double a, double amin, double #if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 if (amin > amax) abort(); #endif -if (a < amin) return amin; -else if (a > amax) return amax; -else return a; +return FFMIN(FFMAX(a, amin), amax); } /** Compute ceil(log2(x)). diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h index 40743fd13e..1520c25ec9 100644 --- a/libavutil/x86/intmath.h +++ b/libavutil/x86/intmath.h @@ -110,8 +110,8 @@ static av_always_inline av_const double av_clipd_sse2(double a, double amin, dou #if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 if (amin > amax) abort(); #endif -__asm__ ("minsd %2, %0 \n\t" - "maxsd %1, %0 \n\t" +__asm__ ("maxsd %1, %0 \n\t" + "minsd %2, %0 \n\t" : "+&x"(a) : "xm"(amin), "xm"(amax)); return a; } @@ -126,8 +126,8 @@ static av_always_inline av_const float av_clipf_sse(float a, float amin, float a #if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 if (amin > amax) abort(); #endif -__asm__ ("minss %2, %0 \n\t" - "maxss %1, %0 \n\t" +__asm__ ("maxss %1, %0 \n\t" + "minss %2, %0 \n\t" : "+&x"(a) : "xm"(amin), "xm"(amax)); return a; } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 2/2] swscale/input: clamp rgbf32 values before lrintf
From: Mark Reid if the float pixel * 65535.0f > 2147483647.0f lrintf may overfow and return negative values, depending on implementation. nan and +/-inf values may also be implementation defined clamp the value first so lrintf so, always works. values <=0.0f, -inf, nan = 0.0f values >=1.0f, +inf = 1.0f the clamping adds some performance overhead, but using a inline function seems help the compiler optimize on the compiliers I tested. old timings 213920 UNITS in planar_rgbf32le_to_uv, 1 runs, 0 skips 218830 UNITS in planar_rgbf32le_to_uv, 2 runs, 0 skips 223285 UNITS in planar_rgbf32le_to_uv, 4 runs, 0 skips 215405 UNITS in planar_rgbf32le_to_uv, 8 runs, 0 skips 208920 UNITS in planar_rgbf32le_to_uv, 16 runs, 0 skips 205115 UNITS in planar_rgbf32le_to_uv, 32 runs, 0 skips 212220 UNITS in planar_rgbf32le_to_uv, 64 runs, 0 skips 216440 UNITS in planar_rgbf32be_to_uv, 1 runs, 0 skips 222450 UNITS in planar_rgbf32be_to_uv, 2 runs, 0 skips 228780 UNITS in planar_rgbf32be_to_uv, 4 runs, 0 skips 226900 UNITS in planar_rgbf32be_to_uv, 8 runs, 0 skips 223168 UNITS in planar_rgbf32be_to_uv, 16 runs, 0 skips 249340 UNITS in planar_rgbf32be_to_uv, 32 runs, 0 skips 233746 UNITS in planar_rgbf32be_to_uv, 64 runs, 0 skips 173360 UNITS in planar_rgbf32le_to_y, 1 runs, 0 skips 179970 UNITS in planar_rgbf32le_to_y, 2 runs, 0 skips 182960 UNITS in planar_rgbf32le_to_y, 4 runs, 0 skips 177040 UNITS in planar_rgbf32le_to_y, 8 runs, 0 skips 170351 UNITS in planar_rgbf32le_to_y, 16 runs, 0 skips 167136 UNITS in planar_rgbf32le_to_y, 32 runs, 0 skips 165821 UNITS in planar_rgbf32le_to_y, 64 runs, 0 skips 181040 UNITS in planar_rgbf32be_to_y, 1 runs, 0 skips 182920 UNITS in planar_rgbf32be_to_y, 2 runs, 0 skips 180935 UNITS in planar_rgbf32be_to_y, 4 runs, 0 skips 180897 UNITS in planar_rgbf32be_to_y, 8 runs, 0 skips 179640 UNITS in planar_rgbf32be_to_y, 16 runs, 0 skips 178912 UNITS in planar_rgbf32be_to_y, 32 runs, 0 skips 177983 UNITS in planar_rgbf32be_to_y, 64 runs, 0 skips new timings 228860 UNITS in planar_rgbf32le_to_uv, 1 runs, 0 skips 232400 UNITS in planar_rgbf32le_to_uv, 2 runs, 0 skips 237270 UNITS in planar_rgbf32le_to_uv, 4 runs, 0 skips 229992 UNITS in planar_rgbf32le_to_uv, 8 runs, 0 skips 70 UNITS in planar_rgbf32le_to_uv, 16 runs, 0 skips 218896 UNITS in planar_rgbf32le_to_uv, 32 runs, 0 skips 216938 UNITS in planar_rgbf32le_to_uv, 64 runs, 0 skips 232340 UNITS in planar_rgbf32be_to_uv, 1 runs, 0 skips 231830 UNITS in planar_rgbf32be_to_uv, 2 runs, 0 skips 242235 UNITS in planar_rgbf32be_to_uv, 4 runs, 0 skips 235210 UNITS in planar_rgbf32be_to_uv, 8 runs, 0 skips 229040 UNITS in planar_rgbf32be_to_uv, 16 runs, 0 skips 224996 UNITS in planar_rgbf32be_to_uv, 32 runs, 0 skips 223581 UNITS in planar_rgbf32be_to_uv, 64 runs, 0 skips 179220 UNITS in planar_rgbf32le_to_y, 1 runs, 0 skips 174790 UNITS in planar_rgbf32le_to_y, 2 runs, 0 skips 182630 UNITS in planar_rgbf32le_to_y, 4 runs, 0 skips 183002 UNITS in planar_rgbf32le_to_y, 8 runs, 0 skips 181005 UNITS in planar_rgbf32le_to_y, 16 runs, 0 skips 179390 UNITS in planar_rgbf32le_to_y, 32 runs, 0 skips 192476 UNITS in planar_rgbf32le_to_y, 64 runs, 0 skips 195620 UNITS in planar_rgbf32be_to_y, 1 runs, 0 skips 195860 UNITS in planar_rgbf32be_to_y, 2 runs, 0 skips 198700 UNITS in planar_rgbf32be_to_y, 4 runs, 0 skips 197252 UNITS in planar_rgbf32be_to_y, 8 runs, 0 skips 195702 UNITS in planar_rgbf32be_to_y, 16 runs, 0 skips 194853 UNITS in planar_rgbf32be_to_y, 32 runs, 0 skips 194459 UNITS in planar_rgbf32be_to_y, 64 runs, 0 skips --- libswscale/input.c | 21 + 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 90efdd2ffc..2a13846abe 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -966,6 +966,11 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, #define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): av_int2float(AV_RL32(src))) +static av_always_inline float clampf(float x, float min, float max) +{ +return FFMIN(FFMAX(x, min), max); +} + static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) { int i; @@ -973,7 +978,7 @@ static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_s
[FFmpeg-devel] [PATCH v2 1/2] swscale/input: unify grayf32 funcs with rgbf32 funcs
From: Mark Reid This is ment to be a cosmetic change old timings: 42780 UNITS in grayf32le, 1 runs, 0 skips 56720 UNITS in grayf32le, 2 runs, 0 skips 67265 UNITS in grayf32le, 4 runs, 0 skips 58082 UNITS in grayf32le, 8 runs, 0 skips 63512 UNITS in grayf32le, 16 runs, 0 skips 52720 UNITS in grayf32le, 32 runs, 0 skips 46491 UNITS in grayf32le, 64 runs, 0 skips 68500 UNITS in grayf32be, 1 runs, 0 skips 66930 UNITS in grayf32be, 2 runs, 0 skips 62305 UNITS in grayf32be, 4 runs, 0 skips 55510 UNITS in grayf32be, 8 runs, 0 skips 50216 UNITS in grayf32be, 16 runs, 0 skips 44480 UNITS in grayf32be, 32 runs, 0 skips 42394 UNITS in grayf32be, 64 runs, 0 skips new timings: 46660 UNITS in grayf32le, 1 runs, 0 skips 51830 UNITS in grayf32le, 2 runs, 0 skips 53390 UNITS in grayf32le, 4 runs, 0 skips 50910 UNITS in grayf32le, 8 runs, 0 skips 44968 UNITS in grayf32le, 16 runs, 0 skips 40349 UNITS in grayf32le, 32 runs, 0 skips 38330 UNITS in grayf32le, 64 runs, 0 skips 39980 UNITS in grayf32be, 1 runs, 0 skips 49630 UNITS in grayf32be, 2 runs, 0 skips 53540 UNITS in grayf32be, 4 runs, 0 skips 59767 UNITS in grayf32be, 8 runs, 0 skips 51206 UNITS in grayf32be, 16 runs, 0 skips 44743 UNITS in grayf32be, 32 runs, 0 skips 41468 UNITS in grayf32be, 64 runs, 0 skips --- libswscale/input.c | 36 +++- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 336f957c8c..90efdd2ffc 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1013,31 +1013,19 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s } } -#undef rdpx - static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, -const uint8_t *unused2, int width, uint32_t *unused) +const uint8_t *unused2, int width, int is_be, uint32_t *unused) { int i; const float *src = (const float *)_src; uint16_t *dst= (uint16_t *)_dst; for (i = 0; i < width; ++i){ -dst[i] = av_clip_uint16(lrintf(65535.0f * src[i])); +dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i))); } } -static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) -{ -int i; -const uint32_t *src = (const uint32_t *)_src; -uint16_t *dst= (uint16_t *)_dst; - -for (i = 0; i < width; ++i){ -dst[i] = av_clip_uint16(lrintf(65535.0f * av_int2float(av_bswap32(src[i]; -} -} +#undef rdpx #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ @@ -1092,6 +1080,12 @@ static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4 int w, int32_t *rgb2yuv) \ { \ planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv); \ +} \ +static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, \ + const uint8_t *unused1, const uint8_t *unused2, \ + int width, uint32_t *unused) \ +{ \ +grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused); \ } rgbf32_planar_funcs_endian(le, 0) @@ -1699,18 +1693,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->lumToYV12 = p010BEToY_c; break; case AV_PIX_FMT_GRAYF32LE: -#if HAVE_BIGENDIAN -c->lumToYV12 = grayf32ToY16_bswap_c; -#else -c->lumToYV12 = grayf32ToY16_c; -#endif +c->lumToYV12 = grayf32leToY16_c; break; case AV_PIX_FMT_GRAYF32BE: -#if HAVE_BIGENDIAN -c->lumToYV12 = grayf32ToY16_c; -#else -c->lumToYV12 = grayf32ToY16_bswap_c; -#endif +c->lumToYV12 = grayf32beToY16_c; break; case AV_PIX_FMT_Y210LE: c->lumToYV12 = y210le_Y_c; -- 2.31.1.window
[FFmpeg-devel] [PATCH v2 1/2] swscale/input: unify grayf32 funcs with rgbf32 funcs
From: Mark Reid --- libswscale/input.c | 36 +++- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 336f957c8c..90efdd2ffc 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1013,31 +1013,19 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s } } -#undef rdpx - static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, -const uint8_t *unused2, int width, uint32_t *unused) +const uint8_t *unused2, int width, int is_be, uint32_t *unused) { int i; const float *src = (const float *)_src; uint16_t *dst= (uint16_t *)_dst; for (i = 0; i < width; ++i){ -dst[i] = av_clip_uint16(lrintf(65535.0f * src[i])); +dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i))); } } -static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) -{ -int i; -const uint32_t *src = (const uint32_t *)_src; -uint16_t *dst= (uint16_t *)_dst; - -for (i = 0; i < width; ++i){ -dst[i] = av_clip_uint16(lrintf(65535.0f * av_int2float(av_bswap32(src[i]; -} -} +#undef rdpx #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ @@ -1092,6 +1080,12 @@ static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4 int w, int32_t *rgb2yuv) \ { \ planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv); \ +} \ +static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, \ + const uint8_t *unused1, const uint8_t *unused2, \ + int width, uint32_t *unused) \ +{ \ +grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused); \ } rgbf32_planar_funcs_endian(le, 0) @@ -1699,18 +1693,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->lumToYV12 = p010BEToY_c; break; case AV_PIX_FMT_GRAYF32LE: -#if HAVE_BIGENDIAN -c->lumToYV12 = grayf32ToY16_bswap_c; -#else -c->lumToYV12 = grayf32ToY16_c; -#endif +c->lumToYV12 = grayf32leToY16_c; break; case AV_PIX_FMT_GRAYF32BE: -#if HAVE_BIGENDIAN -c->lumToYV12 = grayf32ToY16_c; -#else -c->lumToYV12 = grayf32ToY16_bswap_c; -#endif +c->lumToYV12 = grayf32beToY16_c; break; case AV_PIX_FMT_Y210LE: c->lumToYV12 = y210le_Y_c; -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 2/2] swscale/input: clamp rgbf32 values between 0, 1 before scaling
From: Mark Reid if the float pixel * 65535.0f > 2147483647.0f lrintf may overfow and return negative values, depending on implementation. nan and +/-inf values may also be implementation defined clamp the values between 0,1 before scaling, so lrintf always works. values <=0.0f, -inf, nan = 0.0f values >=1.0f, +inf = 1.0f --- libswscale/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libswscale/input.c b/libswscale/input.c index 90efdd2ffc..fc8242a758 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -964,7 +964,7 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, } #undef rdpx -#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): av_int2float(AV_RL32(src))) +#define rdpx(src) (FFMIN(FFMAX(is_be ? av_int2float(AV_RB32(src)): av_int2float(AV_RL32(src)), 0.0f), 1.0f)) static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) { -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] swscale/input: clamp rgbf32 values between 0, 1 before scaling
From: Mark Reid if the float pixel * 65535.0f > 2147483647.0f lrintf may overfow and return negative values, depending on implementation. nan and +/-inf values may also be implementation defined clamp the values between 0,1 before scaling, so lrintf always works. values <=0.0f, -inf, nan = 0.0f values >=1.0f, +inf = 1.0f --- libswscale/input.c | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 336f957c8c..ea50c9de5c 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -964,7 +964,7 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, } #undef rdpx -#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): av_int2float(AV_RL32(src))) +#define rdpx(src) (FFMIN(FFMAX(is_be ? av_int2float(AV_RB32(src)): av_int2float(AV_RL32(src)), 0.0f), 1.0f)) static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) { @@ -1013,17 +1013,16 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s } } -#undef rdpx - static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; const float *src = (const float *)_src; uint16_t *dst= (uint16_t *)_dst; +int is_be = 0; for (i = 0; i < width; ++i){ -dst[i] = av_clip_uint16(lrintf(65535.0f * src[i])); +dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i))); } } @@ -1033,12 +1032,15 @@ static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t * int i; const uint32_t *src = (const uint32_t *)_src; uint16_t *dst= (uint16_t *)_dst; +int is_be = 1; for (i = 0; i < width; ++i){ -dst[i] = av_clip_uint16(lrintf(65535.0f * av_int2float(av_bswap32(src[i]; +dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src+ i))); } } +#undef rdpx + #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ int w, int32_t *rgb2yuv) \ -- 2.29.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] swscale/x86/output.asm: add x86-optimized planer gbr yuv2anyX functions
From: Mark Reid changes since v1: * remove vex intruction on sse4 path * some load/pack marcos use less intructions * fixed some typos yuv2gbrp_full_X_4_512_c: 12757.6 yuv2gbrp_full_X_4_512_sse2: 8946.6 yuv2gbrp_full_X_4_512_sse4: 5138.6 yuv2gbrp_full_X_4_512_avx2: 3889.6 yuv2gbrap_full_X_4_512_c: 15368.6 yuv2gbrap_full_X_4_512_sse2: 11916.1 yuv2gbrap_full_X_4_512_sse4: 6294.6 yuv2gbrap_full_X_4_512_avx2: 3477.1 yuv2gbrp9be_full_X_4_512_c: 14381.6 yuv2gbrp9be_full_X_4_512_sse2: 9139.1 yuv2gbrp9be_full_X_4_512_sse4: 5150.1 yuv2gbrp9be_full_X_4_512_avx2: 2834.6 yuv2gbrp9le_full_X_4_512_c: 12990.1 yuv2gbrp9le_full_X_4_512_sse2: 9118.1 yuv2gbrp9le_full_X_4_512_sse4: 5132.1 yuv2gbrp9le_full_X_4_512_avx2: 2833.1 yuv2gbrp10be_full_X_4_512_c: 14401.6 yuv2gbrp10be_full_X_4_512_sse2: 9133.1 yuv2gbrp10be_full_X_4_512_sse4: 5126.1 yuv2gbrp10be_full_X_4_512_avx2: 2837.6 yuv2gbrp10le_full_X_4_512_c: 12718.1 yuv2gbrp10le_full_X_4_512_sse2: 9106.1 yuv2gbrp10le_full_X_4_512_sse4: 5120.1 yuv2gbrp10le_full_X_4_512_avx2: 2826.1 yuv2gbrap10be_full_X_4_512_c: 18535.6 yuv2gbrap10be_full_X_4_512_sse2: 33617.6 yuv2gbrap10be_full_X_4_512_sse4: 6264.1 yuv2gbrap10be_full_X_4_512_avx2: 3422.1 yuv2gbrap10le_full_X_4_512_c: 16724.1 yuv2gbrap10le_full_X_4_512_sse2: 11787.1 yuv2gbrap10le_full_X_4_512_sse4: 6282.1 yuv2gbrap10le_full_X_4_512_avx2: 3441.6 yuv2gbrp12be_full_X_4_512_c: 13723.6 yuv2gbrp12be_full_X_4_512_sse2: 9128.1 yuv2gbrp12be_full_X_4_512_sse4: 7997.6 yuv2gbrp12be_full_X_4_512_avx2: 2844.1 yuv2gbrp12le_full_X_4_512_c: 12257.1 yuv2gbrp12le_full_X_4_512_sse2: 9107.6 yuv2gbrp12le_full_X_4_512_sse4: 5142.6 yuv2gbrp12le_full_X_4_512_avx2: 2837.6 yuv2gbrap12be_full_X_4_512_c: 18511.1 yuv2gbrap12be_full_X_4_512_sse2: 12156.6 yuv2gbrap12be_full_X_4_512_sse4: 6251.1 yuv2gbrap12be_full_X_4_512_avx2: 3444.6 yuv2gbrap12le_full_X_4_512_c: 16687.1 yuv2gbrap12le_full_X_4_512_sse2: 11785.1 yuv2gbrap12le_full_X_4_512_sse4: 6243.6 yuv2gbrap12le_full_X_4_512_avx2: 3446.1 yuv2gbrp14be_full_X_4_512_c: 13690.6 yuv2gbrp14be_full_X_4_512_sse2: 9120.6 yuv2gbrp14be_full_X_4_512_sse4: 5138.1 yuv2gbrp14be_full_X_4_512_avx2: 2843.1 yuv2gbrp14le_full_X_4_512_c: 14995.6 yuv2gbrp14le_full_X_4_512_sse2: 9119.1 yuv2gbrp14le_full_X_4_512_sse4: 5126.1 yuv2gbrp14le_full_X_4_512_avx2: 2843.1 yuv2gbrp16be_full_X_4_512_c: 12367.1 yuv2gbrp16be_full_X_4_512_sse2: 8233.6 yuv2gbrp16be_full_X_4_512_sse4: 4820.1 yuv2gbrp16be_full_X_4_512_avx2: 2666.6 yuv2gbrp16le_full_X_4_512_c: 10904.1 yuv2gbrp16le_full_X_4_512_sse2: 8214.1 yuv2gbrp16le_full_X_4_512_sse4: 4824.1 yuv2gbrp16le_full_X_4_512_avx2: 2629.1 yuv2gbrap16be_full_X_4_512_c: 26569.6 yuv2gbrap16be_full_X_4_512_sse2: 10884.1 yuv2gbrap16be_full_X_4_512_sse4: 5488.1 yuv2gbrap16be_full_X_4_512_avx2: 3272.1 yuv2gbrap16le_full_X_4_512_c: 14010.1 yuv2gbrap16le_full_X_4_512_sse2: 10562.1 yuv2gbrap16le_full_X_4_512_sse4: 5463.6 yuv2gbrap16le_full_X_4_512_avx2: 3255.1 yuv2gbrpf32be_full_X_4_512_c: 14524.1 yuv2gbrpf32be_full_X_4_512_sse2: 8552.6 yuv2gbrpf32be_full_X_4_512_sse4: 4636.1 yuv2gbrpf32be_full_X_4_512_avx2: 2474.6 yuv2gbrpf32le_full_X_4_512_c: 13060.6 yuv2gbrpf32le_full_X_4_512_sse2: 9682.6 yuv2gbrpf32le_full_X_4_512_sse4: 4298.1 yuv2gbrpf32le_full_X_4_512_avx2: 2453.1 yuv2gbrapf32be_full_X_4_512_c: 18629.6 yuv2gbrapf32be_full_X_4_512_sse2: 11363.1 yuv2gbrapf32be_full_X_4_512_sse4: 15201.6 yuv2gbrapf32be_full_X_4_512_avx2: 3727.1 yuv2gbrapf32le_full_X_4_512_c: 16677.6 yuv2gbrapf32le_full_X_4_512_sse2: 10221.6 yuv2gbrapf32le_full_X_4_512_sse4: 5693.6 yuv2gbrapf32le_full_X_4_512_avx2: 3656.6 --- libswscale/x86/output.asm | 435 +- libswscale/x86/swscale.c | 98 + tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/sw_gbrp.c | 198 + tests/fate/checkasm.mak | 1 + 7 files changed, 734 insertions(+), 2 deletions(-) create mode 100644 tests/checkasm/sw_gbrp.c diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 52cf9f2c2e..ce0a2650b4 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -38,7 +38,49 @@ pw_32: times 8 dw 32 pd_255:times 8 dd 255 pw_512:times 8 dw 512 pw_1024: times 8 dw 1024 - +pd_65535_invf: times 8 dd 0x37800080 ;1.0/65535.0 +pd_yuv2gbrp16_start: times 8 dd -0x4000 +pd_yuv2gbrp_y_start: times 8 dd (1 << 9) +pd_yuv2gbrp_uv_start: times 8 dd ((1 << 9) - (128 << 19)) +pd_yuv2gbrp_a_start: times 8 dd (1 << 18) +pd_yuv2gbrp16_offset: times 8 dd 0x1 ;(1 << 16) +pd_yuv2gbrp16_round13: times 8 dd 0x02000 ;(1 << 13) +pd_yuv2gbrp16_a_offset:times 8 dd 0x20002000 +pd_yuv2gbrp16_upper30: times 8 dd 0x3FFF ;(1<<30) - 1 +pd_yuv2gbrp16_upper27: times 8 dd 0x07FF ;(1<<27) - 1 +pd_yuv2gbrp16_upperC: times 8 dd 0xC000 +pb_pack_shuffle8: db 0, 4, 8, 12, \ + -1, -1, -1, -1, \ +
[FFmpeg-devel] [PATCH 1/1] swscale/input: fix planar_rgb16_to_a for gbrap10be and gbrap12be formats
From: Mark Reid --- libswscale/input.c | 2 +- tests/ref/fate/filter-pixfmts-scale | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 477dc3d6b2..336f957c8c 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -911,7 +911,7 @@ static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[ } #define rdpx(src) \ -is_be ? AV_RB16(src) : AV_RL16(src) +(is_be ? AV_RB16(src) : AV_RL16(src)) static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4], int width, int bpc, int is_be, int32_t *rgb2yuv) { diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale index 07c4ff536d..1623e5c939 100644 --- a/tests/ref/fate/filter-pixfmts-scale +++ b/tests/ref/fate/filter-pixfmts-scale @@ -19,9 +19,9 @@ bgrad8316272bc3a360ef9dff3ecc84520a3 bgra64be4e6a1b9f9c18b881c27d76611d45f737 bgra64leefeee0abcc658ebcff049d5e74d74943 gbrap 4a100f750ac846b34bfeef0d6893c3de -gbrap10be dc6aea3559ea4fcdda1ccc4f23d2f2fb +gbrap10be 50735fbc471a5ac5a6645c85881f3670 gbrap10le 6e1cba57029fdf0f9d46b5e5cd55112b -gbrap12be dbe3a662c016563529032cd4dfb80262 +gbrap12be 58170165829484b3db4a3b9165198987 gbrap12le 24f5ecb32435b73353517e017c165e31 gbrap16be 31968e6872a46e8174fb57f8920ed10d gbrap16le 8c6758f33671b673b6d30969fc05a23d -- 2.29.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] swscale/x86/output.asm: add x86-optimized planer gbr yuv2anyX functions
From: Mark Reid yuv2gbrp_full_X_4_512_c: 12096.6 yuv2gbrp_full_X_4_512_sse2: 10782.6 yuv2gbrp_full_X_4_512_sse4: 5143.6 yuv2gbrp_full_X_4_512_avx2: 3000.1 yuv2gbrap_full_X_4_512_c: 15463.1 yuv2gbrap_full_X_4_512_sse2: 14296.6 yuv2gbrap_full_X_4_512_sse4: 6319.1 yuv2gbrap_full_X_4_512_avx2: 3554.1 yuv2gbrp9be_full_X_4_512_c: 14281.6 yuv2gbrp9be_full_X_4_512_sse2: 11206.1 yuv2gbrp9be_full_X_4_512_sse4: 5033.6 yuv2gbrp9be_full_X_4_512_avx2: 3012.6 yuv2gbrp9le_full_X_4_512_c: 12688.6 yuv2gbrp9le_full_X_4_512_sse2: 10914.1 yuv2gbrp9le_full_X_4_512_sse4: 5144.6 yuv2gbrp9le_full_X_4_512_avx2: 3014.6 yuv2gbrp10be_full_X_4_512_c: 14257.6 yuv2gbrp10be_full_X_4_512_sse2: 11089.6 yuv2gbrp10be_full_X_4_512_sse4: 5039.1 yuv2gbrp10be_full_X_4_512_avx2: 3001.1 yuv2gbrp10le_full_X_4_512_c: 12098.6 yuv2gbrp10le_full_X_4_512_sse2: 10884.1 yuv2gbrp10le_full_X_4_512_sse4: 5138.1 yuv2gbrp10le_full_X_4_512_avx2: 2999.6 yuv2gbrap10be_full_X_4_512_c: 18549.6 yuv2gbrap10be_full_X_4_512_sse2: 14538.6 yuv2gbrap10be_full_X_4_512_sse4: 6292.6 yuv2gbrap10be_full_X_4_512_avx2: 3583.6 yuv2gbrap10le_full_X_4_512_c: 16631.1 yuv2gbrap10le_full_X_4_512_sse2: 14190.6 yuv2gbrap10le_full_X_4_512_sse4: 6348.1 yuv2gbrap10le_full_X_4_512_avx2: 3554.6 yuv2gbrp12be_full_X_4_512_c: 13555.1 yuv2gbrp12be_full_X_4_512_sse2: 10952.1 yuv2gbrp12be_full_X_4_512_sse4: 5137.6 yuv2gbrp12be_full_X_4_512_avx2: 3009.6 yuv2gbrp12le_full_X_4_512_c: 12082.6 yuv2gbrp12le_full_X_4_512_sse2: 10891.1 yuv2gbrp12le_full_X_4_512_sse4: 5184.1 yuv2gbrp12le_full_X_4_512_avx2: 3011.1 yuv2gbrap12be_full_X_4_512_c: 18689.6 yuv2gbrap12be_full_X_4_512_sse2: 14522.6 yuv2gbrap12be_full_X_4_512_sse4: 6237.6 yuv2gbrap12be_full_X_4_512_avx2: 3585.6 yuv2gbrap12le_full_X_4_512_c: 16760.6 yuv2gbrap12le_full_X_4_512_sse2: 14202.1 yuv2gbrap12le_full_X_4_512_sse4: 6252.1 yuv2gbrap12le_full_X_4_512_avx2: 3591.1 yuv2gbrp14be_full_X_4_512_c: 13555.6 yuv2gbrp14be_full_X_4_512_sse2: 10949.1 yuv2gbrp14be_full_X_4_512_sse4: 5185.1 yuv2gbrp14be_full_X_4_512_avx2: 3012.1 yuv2gbrp14le_full_X_4_512_c: 12068.1 yuv2gbrp14le_full_X_4_512_sse2: 10883.6 yuv2gbrp14le_full_X_4_512_sse4: 5145.1 yuv2gbrp14le_full_X_4_512_avx2: 3007.1 yuv2gbrp16be_full_X_4_512_c: 12383.6 yuv2gbrp16be_full_X_4_512_sse2: 8230.6 yuv2gbrp16be_full_X_4_512_sse4: 4765.6 yuv2gbrp16be_full_X_4_512_avx2: 2742.6 yuv2gbrp16le_full_X_4_512_c: 10906.1 yuv2gbrp16le_full_X_4_512_sse2: 28732.1 yuv2gbrp16le_full_X_4_512_sse4: 4709.6 yuv2gbrp16le_full_X_4_512_avx2: 2753.1 yuv2gbrap16be_full_X_4_512_c: 15472.6 yuv2gbrap16be_full_X_4_512_sse2: 11021.6 yuv2gbrap16be_full_X_4_512_sse4: 5487.6 yuv2gbrap16be_full_X_4_512_avx2: 3143.6 yuv2gbrap16le_full_X_4_512_c: 13668.6 yuv2gbrap16le_full_X_4_512_sse2: 10562.1 yuv2gbrap16le_full_X_4_512_sse4: 5506.6 yuv2gbrap16le_full_X_4_512_avx2: 3149.6 yuv2gbrpf32be_full_X_4_512_c: 15471.1 yuv2gbrpf32be_full_X_4_512_sse2: 8524.6 yuv2gbrpf32be_full_X_4_512_sse4: 4559.1 yuv2gbrpf32be_full_X_4_512_avx2: 2388.1 yuv2gbrpf32le_full_X_4_512_c: 14247.6 yuv2gbrpf32le_full_X_4_512_sse2: 7600.6 yuv2gbrpf32le_full_X_4_512_sse4: 4385.6 yuv2gbrpf32le_full_X_4_512_avx2: 2258.6 yuv2gbrapf32be_full_X_4_512_c: 18412.1 yuv2gbrapf32be_full_X_4_512_sse2: 11353.6 yuv2gbrapf32be_full_X_4_512_sse4: 5807.1 yuv2gbrapf32be_full_X_4_512_avx2: 2928.1 yuv2gbrapf32le_full_X_4_512_c: 16485.1 yuv2gbrapf32le_full_X_4_512_sse2: 10202.1 yuv2gbrapf32le_full_X_4_512_sse4: 5571.6 yuv2gbrapf32le_full_X_4_512_avx2: 2847.6 --- libswscale/x86/output.asm | 440 +- libswscale/x86/swscale.c | 99 + tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/sw_gbrp.c | 198 + tests/fate/checkasm.mak | 1 + 7 files changed, 740 insertions(+), 2 deletions(-) create mode 100644 tests/checkasm/sw_gbrp.c diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 52cf9f2c2e..e80b6256b4 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -38,7 +38,49 @@ pw_32: times 8 dw 32 pd_255:times 8 dd 255 pw_512:times 8 dw 512 pw_1024: times 8 dw 1024 - +pd_65535_invf: times 8 dd 0x37800080 ;1.0/65535.0 +pd_yuv2gbrp16_start: times 8 dd -0x4000 +pd_yuv2gbrp_y_start: times 8 dd (1 << 9) +pd_yuv2gbrp_uv_start: times 8 dd ((1 << 9) - (128 << 19)) +pd_yuv2gbrp_a_start: times 8 dd (1 << 18) +pd_yuv2gbrp16_offset: times 8 dd 0x1 ;(1 << 16) +pd_yuv2gbrp16_round13: times 8 dd 0x02000 ;(1 << 13) +pd_yuv2gbrp16_a_offset:times 8 dd 0x20002000 +pd_yuv2gbrp16_upper30: times 8 dd 0x3FFF ;(1<<30) - 1 +pd_yuv2gbrp16_upper27: times 8 dd 0x07FF ;(1<<27) - 1 +pd_yuv2gbrp16_upperC: times 8 dd 0xC000 +pb_lo_pack_shuffle8:db 0, 4, 8, 12, \ + -1, -1, -1, -1, \ + -1, -1, -1, -1, \ + -1, -1, -1, -1 +pb_hi_pack_shuffle8:db -1, -1, -1, -1,
[FFmpeg-devel] [PATCH] avfilter/vf_lut3d: fix building with --disable-optimizations
From: Mark Reid --- libavfilter/x86/vf_lut3d_init.c | 4 1 file changed, 4 insertions(+) diff --git a/libavfilter/x86/vf_lut3d_init.c b/libavfilter/x86/vf_lut3d_init.c index 3b3dd18680..db1a152f51 100644 --- a/libavfilter/x86/vf_lut3d_init.c +++ b/libavfilter/x86/vf_lut3d_init.c @@ -48,9 +48,11 @@ static int interp_##name##_##format##_##opt(AVFilterContext *ctx, void *arg, int DEFINE_INTERP_FUNC(tetrahedral, pf32, avx) DEFINE_INTERP_FUNC(tetrahedral, p16, avx) #endif +#if HAVE_SSE2_EXTERNAL DEFINE_INTERP_FUNC(tetrahedral, pf32, sse2) DEFINE_INTERP_FUNC(tetrahedral, p16, sse2) #endif +#endif av_cold void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc) @@ -78,11 +80,13 @@ av_cold void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc) } #endif } else if (EXTERNAL_SSE2(cpu_flags) && s->interpolation == INTERPOLATE_TETRAHEDRAL && planar) { +#if HAVE_SSE2_EXTERNAL if (isfloat) { s->interp = interp_tetrahedral_pf32_sse2; } else if (depth == 16) { s->interp = interp_tetrahedral_p16_sse2; } +#endif } #endif } -- 2.31.1.windows.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
From: Mark Reid I spotted an interesting pattern that I didn't see before that leads to the implementation being faster. The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines. I also add use of FMA on the AVX2 version. f32 1920x1080 1 thread with prelut c impl 1434012700 UNITS in lut3d->interp,    1 runs,    0 skips 1434035335 UNITS in lut3d->interp,    2 runs,    0 skips 1423615347 UNITS in lut3d->interp,    4 runs,    0 skips 1426268863 UNITS in lut3d->interp,    8 runs,    0 skips sse2 905484420 UNITS in lut3d->interp,    1 runs,    0 skips 905659010 UNITS in lut3d->interp,    2 runs,    0 skips 915167140 UNITS in lut3d->interp,    4 runs,    0 skips 915834222 UNITS in lut3d->interp,    8 runs,    0 skips avx 574794860 UNITS in lut3d->interp,    1 runs,    0 skips 581035090 UNITS in lut3d->interp,    2 runs,    0 skips 584116720 UNITS in lut3d->interp,    4 runs,    0 skips 581460290 UNITS in lut3d->interp,    8 runs,    0 skips avx2 301698880 UNITS in lut3d->interp,    1 runs,    0 skips 301982880 UNITS in lut3d->interp,    2 runs,    0 skips 306962430 UNITS in lut3d->interp,    4 runs,    0 skips 305472025 UNITS in lut3d->interp,    8 runs,    0 skips gbrap16 1920x1080 1 thread with prelut c impl 1480894840 UNITS in lut3d->interp,    1 runs,    0 skips 1502922990 UNITS in lut3d->interp,    2 runs,    0 skips 1496114307 UNITS in lut3d->interp,    4 runs,    0 skips 1492554551 UNITS in lut3d->interp,    8 runs,    0 skips sse2 980777180 UNITS in lut3d->interp,    1 runs,    0 skips 986121520 UNITS in lut3d->interp,    2 runs,    0 skips 986489840 UNITS in lut3d->interp,    4 runs,    0 skips 998832248 UNITS in lut3d->interp,    8 runs,    0 skips avx 622212360 UNITS in lut3d->interp,    1 runs,    0 skips 622981160 UNITS in lut3d->interp,    2 runs,    0 skips 645396315 UNITS in lut3d->interp,    4 runs,    0 skips 641057075 UNITS in lut3d->interp,    8 runs,    0 skips avx2 321336400 UNITS in lut3d->interp,    1 runs,    0 skips 321268920 UNITS in lut3d->interp,    2 runs,    0 skips 323459895 UNITS in lut3d->interp,    4 runs,    0 skips 324949967 UNITS in lut3d->interp,    8 runs,    0 skips --- libavfilter/lut3d.h | 83 libavfilter/vf_lut3d.c | 61 +-- libavfilter/x86/Makefile| 2 + libavfilter/x86/vf_lut3d.asm| 662 libavfilter/x86/vf_lut3d_init.c | 88 + 5 files changed, 840 insertions(+), 56 deletions(-) create mode 100644 libavfilter/lut3d.h create mode 100644 libavfilter/x86/vf_lut3d.asm create mode 100644 libavfilter/x86/vf_lut3d_init.c diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h new file mode 100644 index 00..ded2a036a5 --- /dev/null +++ b/libavfilter/lut3d.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013 Clément Bœsch + * Copyright (c) 2018 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef AVFILTER_LUT3D_H +#define AVFILTER_LUT3D_H + +#include "libavutil/pixdesc.h" +#include "framesync.h" +#include "avfilter.h" + +enum interp_mode { +INTERPOLATE_NEAREST, +INTERPOLATE_TRILINEAR, +INTERPOLATE_TETRAHEDRAL, +INTERPOLATE_PYRAMID, +INTERPOLATE_PRISM, +NB_INTERP_MODE +}; + +struct rgbvec { +float r, g, b; +}; + +/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT + * of 512x512 (64x64x64) */ +#define MAX_LEVEL 256 +#define PRELUT_SIZE 65536 + +typedef struct Lut3DPreLut { +int size; +float min[3]; +float max[3]; +float scale[3]; +float* lut[3]; +} Lut3DPreLut; + +typedef struct LUT3DContext { +const AVClass *class; +struct rgbvec *lut; +int lutsize; +int lutsize2; +struct rgbvec scale; +int interpolation; /// +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in t
[FFmpeg-devel] [PATCH] avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
From: Mark Reid Only supports float and 16bit planer formats at the momoment. Mainly focused on AVX and AVX2 optimizations, but SSE2 does seem offer some speed gains. f32 1920x1080 1 thread with prelut c impl 1389936500 UNITS in lut3d->interp,    1 runs,    0 skips 1425800240 UNITS in lut3d->interp,    2 runs,    0 skips 1433312777 UNITS in lut3d->interp,    4 runs,    0 skips 1443346798 UNITS in lut3d->interp,    8 runs,    0 skips sse2 948662320 UNITS in lut3d->interp,    1 runs,    0 skips 1101247540 UNITS in lut3d->interp,    2 runs,    0 skips 1050645695 UNITS in lut3d->interp,    4 runs,    0 skips 1041102937 UNITS in lut3d->interp,    8 runs,    0 skips avx 633837000 UNITS in lut3d->interp,    1 runs,    0 skips 669452850 UNITS in lut3d->interp,    2 runs,    0 skips 650716580 UNITS in lut3d->interp,    4 runs,    0 skips 644698550 UNITS in lut3d->interp,    8 runs,    0 skips avx2 354940020 UNITS in lut3d->interp,    1 runs,    0 skips 362384340 UNITS in lut3d->interp,    2 runs,    0 skips 356799020 UNITS in lut3d->interp,    4 runs,    0 skips 357276815 UNITS in lut3d->interp,    8 runs,    0 skips gbrap16 1920x1080 1 thread with prelut c impl 1445071160 UNITS in lut3d->interp,    1 runs,    0 skips 1477959120 UNITS in lut3d->interp,    2 runs,    0 skips 1472102670 UNITS in lut3d->interp,    4 runs,    0 skips 1462579330 UNITS in lut3d->interp,    8 runs,    0 skips sse2 1035437580 UNITS in lut3d->interp,    1 runs,    0 skips 1050139710 UNITS in lut3d->interp,    2 runs,    0 skips 1070147205 UNITS in lut3d->interp,    4 runs,    0 skips 1064583037 UNITS in lut3d->interp,    8 runs,    0 skips avx 678089880 UNITS in lut3d->interp,    1 runs,    0 skips 679112485 UNITS in lut3d->interp,    2 runs,    0 skips 695527212 UNITS in lut3d->interp,    4 runs,    0 skips 691300053 UNITS in lut3d->interp,    8 runs,    0 skips avx2 372671340 UNITS in lut3d->interp,    1 runs,    0 skips 373449870 UNITS in lut3d->interp,    2 runs,    0 skips 383725625 UNITS in lut3d->interp,    4 runs,    0 skips 382860848 UNITS in lut3d->interp,    8 runs,    0 skips --- libavfilter/lut3d.h | 83 libavfilter/vf_lut3d.c | 61 +-- libavfilter/x86/Makefile| 2 + libavfilter/x86/vf_lut3d.asm| 757 libavfilter/x86/vf_lut3d_init.c | 88 5 files changed, 935 insertions(+), 56 deletions(-) create mode 100644 libavfilter/lut3d.h create mode 100644 libavfilter/x86/vf_lut3d.asm create mode 100644 libavfilter/x86/vf_lut3d_init.c diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h new file mode 100644 index 00..ded2a036a5 --- /dev/null +++ b/libavfilter/lut3d.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013 Clément Bœsch + * Copyright (c) 2018 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef AVFILTER_LUT3D_H +#define AVFILTER_LUT3D_H + +#include "libavutil/pixdesc.h" +#include "framesync.h" +#include "avfilter.h" + +enum interp_mode { +INTERPOLATE_NEAREST, +INTERPOLATE_TRILINEAR, +INTERPOLATE_TETRAHEDRAL, +INTERPOLATE_PYRAMID, +INTERPOLATE_PRISM, +NB_INTERP_MODE +}; + +struct rgbvec { +float r, g, b; +}; + +/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT + * of 512x512 (64x64x64) */ +#define MAX_LEVEL 256 +#define PRELUT_SIZE 65536 + +typedef struct Lut3DPreLut { +int size; +float min[3]; +float max[3]; +float scale[3]; +float* lut[3]; +} Lut3DPreLut; + +typedef struct LUT3DContext { +const AVClass *class; +struct rgbvec *lut; +int lutsize; +int lutsize2; +struct rgbvec scale; +int interpolation; /// +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
[FFmpeg-devel] [PATCH v2 2/2] libavdevice/avfoundation: add option to set audio sample rate and use native device formats
From: Mark Reid This also seems to prevent the audio format changing after format has been identified. This can happen in ffplay and might have something to do with sdl configuring the audio devices. --- libavdevice/avfoundation.m | 123 - 1 file changed, 94 insertions(+), 29 deletions(-) diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 5ac6ec4183..70226cfdc8 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -118,11 +118,8 @@ typedef struct int audio_channels; int audio_bits_per_sample; -int audio_float; -int audio_be; -int audio_signed_integer; -int audio_packed; int audio_non_interleaved; +int audio_sample_rate; int32_t *audio_buffer; int audio_buffer_size; @@ -632,12 +629,47 @@ static int add_video_device(AVFormatContext *s, AVCaptureDevice *video_device) return 0; } +static enum AVCodecID find_audio_codec_id(const AudioStreamBasicDescription *basic_desc) +{ +int audio_float = basic_desc->mFormatFlags & kAudioFormatFlagIsFloat; +int audio_signed_integer = basic_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger; +int audio_be = basic_desc->mFormatFlags & kAudioFormatFlagIsBigEndian; +int audio_packed = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked; +int audio_bits_per_sample = basic_desc->mBitsPerChannel; + +if (basic_desc->mFormatID == kAudioFormatLinearPCM && +audio_float && +audio_bits_per_sample == 32 && +audio_packed) { +return audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE; +} else if (basic_desc->mFormatID == kAudioFormatLinearPCM && +audio_signed_integer && +audio_bits_per_sample == 16 && +audio_packed) { +return audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE; +} else if (basic_desc->mFormatID == kAudioFormatLinearPCM && +audio_signed_integer && +audio_bits_per_sample == 24 && +audio_packed) { +return audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE; +} else if (basic_desc->mFormatID == kAudioFormatLinearPCM && +audio_signed_integer && +audio_bits_per_sample == 32 && +audio_packed) { +return audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE; +} else { +return AV_CODEC_ID_NONE; +} +} + static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device) { AVFContext *ctx = (AVFContext*)s->priv_data; NSError *error = nil; AVCaptureDeviceInput* audio_dev_input = [[[AVCaptureDeviceInput alloc] initWithDevice:audio_device error:&error] autorelease]; dispatch_queue_t queue; +NSObject *format = nil; +const AudioStreamBasicDescription *format_desc = NULL; if (!audio_dev_input) { av_log(s, AV_LOG_ERROR, "Failed to create AV capture input device: %s\n", @@ -660,6 +692,61 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device) return 1; } +#if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 + +for (format in [audio_device valueForKey:@"formats"]) { +CMFormatDescriptionRef formatDescription; +formatDescription = (CMFormatDescriptionRef) [format performSelector:@selector(formatDescription)]; +const AudioStreamBasicDescription *desc = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription); + +if (desc->mSampleRate == ctx->audio_sample_rate) { +format_desc = desc; +break; +} +} + +if(!format_desc) { +av_log(s, AV_LOG_ERROR, "Selected audio sample rate (%d Hz) is not supported\n", ctx->audio_sample_rate); +av_log(s, AV_LOG_ERROR, "Supported audio formats:\n"); +for (format in [audio_device valueForKey:@"formats"]) { +const char *codec_name; +CMFormatDescriptionRef formatDescription; +formatDescription = (CMFormatDescriptionRef) [format performSelector:@selector(formatDescription)]; +const AudioStreamBasicDescription *desc = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription); + +enum AVCodecID codec_id = find_audio_codec_id(desc); +if (codec_id == AV_CODEC_ID_NONE) { +continue; +} + +codec_name = avcodec_get_name(codec_id); +av_log(s, AV_LOG_ERROR, " %s, %d ch, %0.0f Hz \n", codec_name, desc->mChannelsPerFrame, desc->mSampleRate); +} + +format_desc = CMAudioFormatDescriptionGetStreamBasicDescription(audio_device.activeFormat.formatDescription); +if (format_desc) +av_log(s, AV_LOG_WARNING, "Overriding selected sample rate with active sample rate: %0.0f Hz instead\n", format_desc->mSa
[FFmpeg-devel] [PATCH v2 1/2] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive
From: Mark Reid This fixes audio issues I've had with some capture devices. The audio gets really choppy and stops working. This seems to be because avf_read_packet stops outputting the audio frames because a video frame happens to be available first. It base on the approach used in a patch from #4437 https://trac.ffmpeg.org/ticket/4437 My approach uses an AVFifoBuffer instead of NSMutableArray and also outputs the packets in the same order they arrive from AVFFoundation. should fix ticket #4437 and #4513 --- libavdevice/avfoundation.m | 160 - 1 file changed, 124 insertions(+), 36 deletions(-) diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 59d5b0af4f..5ac6ec4183 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -31,13 +31,17 @@ #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "libavutil/avstring.h" +#include "libavutil/avassert.h" #include "libavformat/internal.h" #include "libavutil/internal.h" #include "libavutil/parseutils.h" #include "libavutil/time.h" #include "libavutil/imgutils.h" +#include "libavutil/fifo.h" #include "avdevice.h" +#define FIFO_SIZE 4 + static const int avf_time_base = 100; static const AVRational avf_time_base_q = { @@ -128,8 +132,8 @@ typedef struct AVCaptureSession *capture_session; AVCaptureVideoDataOutput *video_output; AVCaptureAudioDataOutput *audio_output; -CMSampleBufferRef current_frame; -CMSampleBufferRef current_audio_frame; +AVFifoBuffer *video_fifo; +AVFifoBuffer *audio_fifo; AVCaptureDevice *observed_device; #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 @@ -138,6 +142,11 @@ typedef struct int observed_quit; } AVFContext; +typedef struct { +int64_t ts; +CMSampleBufferRef frame; +} BufferRef; + static void lock_frames(AVFContext* ctx) { pthread_mutex_lock(&ctx->frame_lock); @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) pthread_mutex_unlock(&ctx->frame_lock); } +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame) +{ +BufferRef buf = { +.ts= ts, +.frame = frame, +}; + +CFRetain(frame); +av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); +} + +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) +{ +if (av_fifo_size(f)) { +av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); +return; +} +buf->frame = nil; +return; +} + +static inline void fifo_drain(AVFifoBuffer* f, int release) +{ +av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); +if (release) { +BufferRef buf; +fifo_peek(f, &buf); +CFRelease(buf.frame); +} +av_fifo_drain(f, sizeof(BufferRef)); +} + +static inline void fifo_freep(AVFifoBuffer **f) +{ +if (f) { +while (av_fifo_size(*f)) { +fifo_drain(*f, 1); +} +av_fifo_freep(f); +} +} + /** FrameReciever class - delegate for AVCaptureSession */ @interface AVFFrameReceiver : NSObject @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)videoFrame fromConnection:(AVCaptureConnection *)connection { +AVFifoBuffer *fifo = _context->video_fifo; +int64_t ts = av_gettime_relative(); lock_frames(_context); -if (_context->current_frame != nil) { -CFRelease(_context->current_frame); +if (av_fifo_space(fifo) == 0) { +av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n"); +fifo_drain(fifo, 1); } -_context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); +fifo_write(fifo, ts, videoFrame); unlock_frames(_context); @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)audioFrame fromConnection:(AVCaptureConnection *)connection { +AVFifoBuffer *fifo = _context->audio_fifo; +int64_t ts = av_gettime_relative(); lock_frames(_context); -if (_context->current_audio_frame != nil) { -CFRelease(_context->current_audio_frame); +if (!av_fifo_space(fifo)) { +av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n"); +fifo_drain(fifo, 1); } -_context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame); +fifo_write(fifo, ts, audioFrame); unlock_frames(_context); @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) ctx->avf_audio_delegate = NULL; av_freep(&ctx->audio_buffer); +fifo_freep(&ctx->video_fifo); +fifo_freep(&ctx->audio_fifo); pthread_mutex_destroy(&ctx->frame_lock); - -if (ctx->current_frame) { -CFRelease(ctx->current_frame); -} } static void parse_device_name(AVFormatContext *s)
[FFmpeg-devel] [PATCH] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive
From: Mark Reid Hi, This patch fixes audio issues I've had with some capture devices. The audio gets really choppy and stops working. This seems to be because avf_read_packet stops outputting the audio frames because a video frame happens to be available first. It base on the approach used in a patch from #4437 https://trac.ffmpeg.org/ticket/4437 My approach uses an AVFifoBuffer instead of NSMutableArray and also outputs the packets in the same order they arrive from AVFFoundation. should fix ticket #4437 and #4513 --- libavdevice/avfoundation.m | 160 - 1 file changed, 124 insertions(+), 36 deletions(-) diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 59d5b0af4f..5ac6ec4183 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -31,13 +31,17 @@ #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "libavutil/avstring.h" +#include "libavutil/avassert.h" #include "libavformat/internal.h" #include "libavutil/internal.h" #include "libavutil/parseutils.h" #include "libavutil/time.h" #include "libavutil/imgutils.h" +#include "libavutil/fifo.h" #include "avdevice.h" +#define FIFO_SIZE 4 + static const int avf_time_base = 100; static const AVRational avf_time_base_q = { @@ -128,8 +132,8 @@ typedef struct AVCaptureSession *capture_session; AVCaptureVideoDataOutput *video_output; AVCaptureAudioDataOutput *audio_output; -CMSampleBufferRef current_frame; -CMSampleBufferRef current_audio_frame; +AVFifoBuffer *video_fifo; +AVFifoBuffer *audio_fifo; AVCaptureDevice *observed_device; #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 @@ -138,6 +142,11 @@ typedef struct int observed_quit; } AVFContext; +typedef struct { +int64_t ts; +CMSampleBufferRef frame; +} BufferRef; + static void lock_frames(AVFContext* ctx) { pthread_mutex_lock(&ctx->frame_lock); @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) pthread_mutex_unlock(&ctx->frame_lock); } +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame) +{ +BufferRef buf = { +.ts= ts, +.frame = frame, +}; + +CFRetain(frame); +av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); +} + +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) +{ +if (av_fifo_size(f)) { +av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); +return; +} +buf->frame = nil; +return; +} + +static inline void fifo_drain(AVFifoBuffer* f, int release) +{ +av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); +if (release) { +BufferRef buf; +fifo_peek(f, &buf); +CFRelease(buf.frame); +} +av_fifo_drain(f, sizeof(BufferRef)); +} + +static inline void fifo_freep(AVFifoBuffer **f) +{ +if (f) { +while (av_fifo_size(*f)) { +fifo_drain(*f, 1); +} +av_fifo_freep(f); +} +} + /** FrameReciever class - delegate for AVCaptureSession */ @interface AVFFrameReceiver : NSObject @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)videoFrame fromConnection:(AVCaptureConnection *)connection { +AVFifoBuffer *fifo = _context->video_fifo; +int64_t ts = av_gettime_relative(); lock_frames(_context); -if (_context->current_frame != nil) { -CFRelease(_context->current_frame); +if (av_fifo_space(fifo) == 0) { +av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n"); +fifo_drain(fifo, 1); } -_context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); +fifo_write(fifo, ts, videoFrame); unlock_frames(_context); @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)audioFrame fromConnection:(AVCaptureConnection *)connection { +AVFifoBuffer *fifo = _context->audio_fifo; +int64_t ts = av_gettime_relative(); lock_frames(_context); -if (_context->current_audio_frame != nil) { -CFRelease(_context->current_audio_frame); +if (!av_fifo_space(fifo)) { +av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n"); +fifo_drain(fifo, 1); } -_context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame); +fifo_write(fifo, ts, audioFrame); unlock_frames(_context); @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) ctx->avf_audio_delegate = NULL; av_freep(&ctx->audio_buffer); +fifo_freep(&ctx->video_fifo); +fifo_freep(&ctx->audio_fifo); pthread_mutex_destroy(&ctx->frame_lock); - -if (ctx->current_frame) { -CFRelease(ctx->current_frame); -} } static void parse_device_name(AVForma
[FFmpeg-devel] [PATCH v2] avformat/mov: fix timecode with counter mode flag set
From: Mark Reid The current behaviour ends up squaring the avg_frame_rate if the conter mode flag is set. This messes up the timecode calculation, and looks to me as a regression that seems to have been introduced 428b4aac. Upon further testing is seems that no special case is need for having the counter flag set. av_timecode_init appears to handles the timecode correctly, at least in the sample files I have. Here is a sample mov file with the counter flag set https://www.dropbox.com/s/5l4fucb9lhq523s/timecode_counter_mode.mov before the patch ffmpeg will report the timecode as: 00:37:11:97 and warns that the timecode framerate is 57600/1002001 after patch: 14:50:55:02 --- libavformat/mov.c | 13 - 1 file changed, 13 deletions(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index 3215b53636..f8856a43dd 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -2350,19 +2350,6 @@ FF_DISABLE_DEPRECATION_WARNINGS st->codec->time_base = av_inv_q(st->avg_frame_rate); FF_ENABLE_DEPRECATION_WARNINGS #endif -/* adjust for per frame dur in counter mode */ -if (tmcd_ctx->tmcd_flags & 0x0008) { -int timescale = AV_RB32(st->codecpar->extradata + 8); -int framedur = AV_RB32(st->codecpar->extradata + 12); -st->avg_frame_rate.num *= timescale; -st->avg_frame_rate.den *= framedur; -#if FF_API_LAVF_AVCTX -FF_DISABLE_DEPRECATION_WARNINGS -st->codec->time_base.den *= timescale; -st->codec->time_base.num *= framedur; -FF_ENABLE_DEPRECATION_WARNINGS -#endif -} if (size > 30) { uint32_t len = AV_RB32(st->codecpar->extradata + 18); /* name atom length */ uint32_t format = AV_RB32(st->codecpar->extradata + 22); -- 2.21.1 (Apple Git-122.3) ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/1] avformat/mov: fix timecode with counter mode flag set
From: Mark Reid The current behaviour ends up squaring the avg_frame_rate if the conter mode flag is set. This messes up the timecode calculation, and looks to me as a regression that seems to have been introduced 428b4aac. The new behaviour is use the "Number of frames" field for avg_frame_rate from the timecode atom as describe here: https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP4939-CH205-69831 Number of frames An 8-bit integer that contains the number of frames per second for the timecode format. If the time is a counter, this is the number of frames for each counter tick. Here is a sample mov file with the counter flag set https://www.dropbox.com/s/5l4fucb9lhq523s/timecode_counter_mode.mov before the patch ffmpeg will report the timecode as: 00:37:11:97 and warns that the timecode framerate is 57600/1002001 after patch: 14:50:55:02 --- libavformat/mov.c | 17 + 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index 2b90e31170..76c1ceb82a 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -2336,24 +2336,17 @@ static int mov_parse_stsd_data(MOVContext *c, AVIOContext *pb, tmcd_ctx->tmcd_flags = val; st->avg_frame_rate.num = AV_RB32(st->codecpar->extradata + 8); /* timescale */ st->avg_frame_rate.den = AV_RB32(st->codecpar->extradata + 12); /* frameDuration */ -#if FF_API_LAVF_AVCTX -FF_DISABLE_DEPRECATION_WARNINGS -st->codec->time_base = av_inv_q(st->avg_frame_rate); -FF_ENABLE_DEPRECATION_WARNINGS -#endif + /* adjust for per frame dur in counter mode */ if (tmcd_ctx->tmcd_flags & 0x0008) { -int timescale = AV_RB32(st->codecpar->extradata + 8); -int framedur = AV_RB32(st->codecpar->extradata + 12); -st->avg_frame_rate.num *= timescale; -st->avg_frame_rate.den *= framedur; +st->avg_frame_rate.num = st->codecpar->extradata[16] /* fps, frames per counter tick in counter mode */; +st->avg_frame_rate.den = 1; +} #if FF_API_LAVF_AVCTX FF_DISABLE_DEPRECATION_WARNINGS -st->codec->time_base.den *= timescale; -st->codec->time_base.num *= framedur; +st->codec->time_base = av_inv_q(st->avg_frame_rate); FF_ENABLE_DEPRECATION_WARNINGS #endif -} if (size > 30) { uint32_t len = AV_RB32(st->codecpar->extradata + 18); /* name atom length */ uint32_t format = AV_RB32(st->codecpar->extradata + 22); -- 2.29.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/exr: preserve half-float NaN bits and add fate test
From: Mark Reid Hi, This patch handles NaNs more like the offical implentation handles them, preserving the original bits. https://github.com/AcademySoftwareFoundation/openexr/blob/RB-2.5/IlmBase/Half/toFloat.cpp#L111 It also adds a fate test that is a 256x256 exr containing all possible 16bit half-float values. Here is a link to download the fate test file, if someone could add it to fate me https://www.dropbox.com/s/2q4jg8w489aunsf/rgb_scanline_zip_half_float_0x0_to_0x.exr --- libavcodec/exr.c| 3 +-- tests/fate/image.mak| 2 ++ tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x | 6 ++ 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x diff --git a/libavcodec/exr.c b/libavcodec/exr.c index d233dd43fb..6e6ce4275c 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -193,8 +193,7 @@ static union av_intfloat32 exr_half2float(uint16_t hf) // half-float NaNs will be converted to a single precision NaN // half-float Infs will be converted to a single precision Inf exp = FLOAT_MAX_BIASED_EXP; -if (mantissa) -mantissa = (1 << 23) - 1;// set all bits to indicate a NaN +mantissa <<= 13; // preserve half-float NaN bits if set } else if (exp == 0x0) { // convert half-float zero/denorm to single precision value if (mantissa) { diff --git a/tests/fate/image.mak b/tests/fate/image.mak index 22072a62f1..c453f0f79c 100644 --- a/tests/fate/image.mak +++ b/tests/fate/image.mak @@ -317,6 +317,8 @@ fate-exr-rgb-scanline-half-zip-dw-outside: CMD = framecrc -i $(TARGET_SAMPLES)/e FATE_EXR += fate-exr-rgb-tile-half-zip-dw-outside fate-exr-rgb-tile-half-zip-dw-outside: CMD = framecrc -i $(TARGET_SAMPLES)/exr/rgb_tile_half_zip_dw_outside.exr -pix_fmt gbrpf32le +FATE_EXR += fate-exr-rgb-scanline-zip-half-0x0-0x +fate-exr-rgb-scanline-zip-half-0x0-0x: CMD = framecrc -i $(TARGET_SAMPLES)/exr/rgb_scanline_zip_half_float_0x0_to_0x.exr -pix_fmt gbrpf32le FATE_EXR-$(call DEMDEC, IMAGE2, EXR) += $(FATE_EXR) diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x new file mode 100644 index 00..b6201116fe --- /dev/null +++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x @@ -0,0 +1,6 @@ +#tb 0: 1/25 +#media_type 0: video +#codec_id 0: rawvideo +#dimensions 0: 256x256 +#sar 0: 1/1 +0, 0, 0,1, 786432, 0x1445e411 -- 2.29.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/1] avcodec/exr: use lookuptable for alpha if there is no trc_func
From: Mark Reid --- libavcodec/exr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index cf7824402a..e907c5c464 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -1203,7 +1203,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata, } } else if (s->pixel_type == EXR_HALF) { // 16-bit -if (c < 3) { +if (c < 3 || !trc_func) { for (x = 0; x < xsize; x++) { *ptr_x++ = s->gamma_table[bytestream_get_le16(&src)]; } -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/1] avfilter/vf_lut3d: fix sanitizef INF handling
From: Mark Reid --- libavfilter/vf_lut3d.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c index 988f6c8b55..172d6df0c8 100644 --- a/libavfilter/vf_lut3d.c +++ b/libavfilter/vf_lut3d.c @@ -107,7 +107,7 @@ typedef struct ThreadData { #define EXPONENT_MASK 0x7F80 #define MANTISSA_MASK 0x007F -#define SIGN_MASK 0x7FFF +#define SIGN_MASK 0x8000 static inline float sanitizef(float f) { @@ -120,7 +120,7 @@ static inline float sanitizef(float f) return 0.0f; } else if (t.i & SIGN_MASK) { // -INF -return FLT_MIN; +return -FLT_MAX; } else { // +INF return FLT_MAX; -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libswcale/input: use more accurate planer rgb16 yuv conversions
From: Mark Reid These conversion appears to be exhibiting the same rounding error as the rgbf32 formats where. I seperated the rounding value from the 16 and 128 offsets, I think it makes it a little more clear. --- libswscale/input.c | 6 ++-- tests/ref/fate/filter-pixfmts-scale| 32 +++--- tests/ref/fate/psd-rgb48 | 2 +- tests/ref/fate/psd-rgba64 | 2 +- tests/ref/fate/sws-floatimg-cmp| 32 +++--- tests/ref/vsynth/vsynth1-ffv1-v3-rgb48 | 4 +-- tests/ref/vsynth/vsynth1-r210 | 4 +-- tests/ref/vsynth/vsynth2-ffv1-v3-rgb48 | 4 +-- tests/ref/vsynth/vsynth2-r210 | 4 +-- tests/ref/vsynth/vsynth3-ffv1-v3-rgb48 | 4 +-- tests/ref/vsynth/vsynth3-r210 | 4 +-- tests/ref/vsynth/vsynth_lena-ffv1-v3-rgb48 | 4 +-- tests/ref/vsynth/vsynth_lena-r210 | 4 +-- 13 files changed, 53 insertions(+), 53 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 67a85b0418..6850801a44 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -924,7 +924,7 @@ static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_sr int b = rdpx(src[1] + i); int r = rdpx(src[2] + i); -dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); +dst[i] = (ry*r + gy*g + by*b + (16 << (RGB2YUV_SHIFT + bpc - 8)) + (1 << (RGB2YUV_SHIFT + shift - 15))) >> (RGB2YUV_SHIFT + shift - 14); } } @@ -957,8 +957,8 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, int b = rdpx(src[1] + i); int r = rdpx(src[2] + i); -dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); -dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); +dstU[i] = (ru*r + gu*g + bu*b + (128 << (RGB2YUV_SHIFT + bpc - 8)) + (1 << (RGB2YUV_SHIFT + shift - 15))) >> (RGB2YUV_SHIFT + shift - 14); +dstV[i] = (rv*r + gv*g + bv*b + (128 << (RGB2YUV_SHIFT + bpc - 8)) + (1 << (RGB2YUV_SHIFT + shift - 15))) >> (RGB2YUV_SHIFT + shift - 14); } } #undef rdpx diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale index 30e7cd5b06..f47c9b887f 100644 --- a/tests/ref/fate/filter-pixfmts-scale +++ b/tests/ref/fate/filter-pixfmts-scale @@ -19,25 +19,25 @@ bgrad8316272bc3a360ef9dff3ecc84520a3 bgra64be4e6a1b9f9c18b881c27d76611d45f737 bgra64leefeee0abcc658ebcff049d5e74d74943 gbrap 4a100f750ac846b34bfeef0d6893c3de -gbrap10be 6d89abb9248006c3e9017545e9474654 -gbrap10le cf974e23f485a10740f5de74a5c8c3df -gbrap12be 1d9b57766ba9c2192403f43967cb9af0 -gbrap12le bb1ba1c157717db3dd612a76d38a018e -gbrap16be c72b935a6e57a8e1c37bff08c2db55b1 -gbrap16le 13eb0e62b1ac9c1c86c81521eaefab5f +gbrap10be dc6aea3559ea4fcdda1ccc4f23d2f2fb +gbrap10le 6e1cba57029fdf0f9d46b5e5cd55112b +gbrap12be dbe3a662c016563529032cd4dfb80262 +gbrap12le 24f5ecb32435b73353517e017c165e31 +gbrap16be 31968e6872a46e8174fb57f8920ed10d +gbrap16le 8c6758f33671b673b6d30969fc05a23d gbrapf32be 366b804d5697276e8c481c4bdf05a00b gbrapf32le 558a268e6d6b907449d1056afab78f29 gbrpdc3387f925f972c61aae7eb23cdc19f0 -gbrp10be0277d4c3a8498d75e2783fb81379e481 -gbrp10lef3d70f8ab845c3c9b8f7452e4a6e285a -gbrp12befbd4e149c452c351c6d1c11d6b6e176a -gbrp12lec51d51c3b753d735eb22983397262c88 -gbrp14becd20808592e62cc439786c18a14b3e70 -gbrp14le456f7d1ff5990aa6379137d84dd63862 -gbrp16be5fc826cfabebfc1442cb793c4b6303e2 -gbrp16le1b3e0b63d47a3e1b6b20931316883bf2 -gbrp9be d9c88968001e1452ff31fbc8d16b18a0 -gbrp9le 2ccfed0816bf6bd4bb3a5b7591d9603a +gbrp10bea318ea42e53a7b80a55aa7c19c9a0ab5 +gbrp10le994e8fc6a1e5b230f4c55893fd7618d6 +gbrp12bebfbd419dd18c0a5677d4bce55ab24e2e +gbrp12le95af1f8495b2a7a7ad67802e3e8bca01 +gbrp14be9d8113c9a5182c02dbe4576509f59a96 +gbrp14le952f39881e500ed684c8b216185f4b80 +gbrp16be5241eee3465096efa111b86b30c3 +gbrp16le5b8b997378ce31207f37059dbfb40c4a +gbrp9be d7caf58cc3a74a036e11f924f03fc04c +gbrp9le 010f7bcd8b2e17065d01a09f0d483218 gbrpf32be f3d0cefdf11c861001880772d817aac8 gbrpf32le 290468205c1c18a0667edfca45061aee gray221201cc7cfc4964eacd8b3e426fd276 diff --git a/tests/ref/fate/psd-rgb48 b/tests/ref/fate/psd-rgb48 index f60cbab078..139b3c1df3 100644 --- a/tests/ref/fate/psd-rgb48 +++ b/tests/ref/fate/psd-rgb48 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #
[FFmpeg-devel] [PATCH v2 2/2] libswcale/input: use more accurate rgbf32 yuv conversions
From: Mark Reid --- libswscale/input.c | 12 ++- tests/ref/fate/filter-pixfmts-scale | 8 +- tests/ref/fate/sws-floatimg-cmp | 122 ++-- 3 files changed, 70 insertions(+), 72 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 064ed5902f..67a85b0418 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -984,15 +984,14 @@ static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t *_dstV, uint16_t *dstV = (uint16_t *)_dstV; int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; -int bpc = 16; -int shift = 14; + for (i = 0; i < width; i++) { int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); -dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); -dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); +dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -1003,14 +1002,13 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s uint16_t *dst= (uint16_t *)_dst; int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; -int bpc = 16; -int shift = 14; + for (i = 0; i < width; i++) { int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); -dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); +dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale index d7020ad2c3..30e7cd5b06 100644 --- a/tests/ref/fate/filter-pixfmts-scale +++ b/tests/ref/fate/filter-pixfmts-scale @@ -25,8 +25,8 @@ gbrap12be 1d9b57766ba9c2192403f43967cb9af0 gbrap12le bb1ba1c157717db3dd612a76d38a018e gbrap16be c72b935a6e57a8e1c37bff08c2db55b1 gbrap16le 13eb0e62b1ac9c1c86c81521eaefab5f -gbrapf32be 42e53d9edccbd9e09c4cd78780ba92f3 -gbrapf32le eebf3973ef94c841f0a1ceb1ed61621d +gbrapf32be 366b804d5697276e8c481c4bdf05a00b +gbrapf32le 558a268e6d6b907449d1056afab78f29 gbrpdc3387f925f972c61aae7eb23cdc19f0 gbrp10be0277d4c3a8498d75e2783fb81379e481 gbrp10lef3d70f8ab845c3c9b8f7452e4a6e285a @@ -38,8 +38,8 @@ gbrp16be5fc826cfabebfc1442cb793c4b6303e2 gbrp16le1b3e0b63d47a3e1b6b20931316883bf2 gbrp9be d9c88968001e1452ff31fbc8d16b18a0 gbrp9le 2ccfed0816bf6bd4bb3a5b7591d9603a -gbrpf32be 4614d32e4417f80e0adcc1bdcf6cde42 -gbrpf32le 1366ee77e5559672260bbe51040e28b2 +gbrpf32be f3d0cefdf11c861001880772d817aac8 +gbrpf32le 290468205c1c18a0667edfca45061aee gray221201cc7cfc4964eacd8b3e426fd276 gray10be9452756d0b37f4f5c7cae7635e22d747 gray10le37fd2e1ec6b66410212d39a342e864df diff --git a/tests/ref/fate/sws-floatimg-cmp b/tests/ref/fate/sws-floatimg-cmp index 24204254c4..cf6788fc23 100644 --- a/tests/ref/fate/sws-floatimg-cmp +++ b/tests/ref/fate/sws-floatimg-cmp @@ -1,120 +1,120 @@ gbrpf32le -> yuv444p16le -> gbrpf32le -avg diff: 0.003852 +avg diff: 0.000125 min diff: 0.00 -max diff: 0.006638 +max diff: 0.000501 gbrpf32le -> yuv444p -> gbrpf32le -avg diff: 0.004316 +avg diff: 0.001804 min diff: 0.00 -max diff: 0.012704 +max diff: 0.006399 gbrpf32le -> yuv444p9le -> gbrpf32le -avg diff: 0.004053 -min diff: 0.01 -max diff: 0.009402 +avg diff: 0.000906 +min diff: 0.00 +max diff: 0.003313 gbrpf32le -> yuv444p10le -> gbrpf32le -avg diff: 0.003960 +avg diff: 0.000467 min diff: 0.00 -max diff: 0.008123 +max diff: 0.001912 gbrpf32le -> yuv444p12le -> gbrpf32le -avg diff: 0.003878 +avg diff: 0.000166 min diff: 0.00 -max diff: 0.007011 +max diff: 0.000802 gbrpf32le -> yuv444p14le -> gbrpf32le -avg diff: 0.003868 +avg diff: 0.000127 min diff: 0.00 -max diff: 0.006729 +max diff: 0.000524 gbrpf32le -> rgb24 -> gbrpf32le -avg diff: 0.004122 +avg diff: 0.001011 min diff: 0.00 -max diff: 0.008975 +max diff: 0.004229 gbrpf32le -> bgr24 -> gbrpf32le -avg diff: 0.004122 +avg diff: 0.001011 min diff: 0.00 -max diff: 0.008975 +max diff: 0.004229 gbrpf32le -> rgba -> gbrpf32le -avg diff: 0.004122 +avg diff: 0.001011 min diff: 0.00 -max diff: 0.008975
[FFmpeg-devel] [PATCH v2 1/2] libswscale/tests: add floatimg_cmp test
From: Mark Reid changes since v1: - made into fate test - fixed c90 warnings - tests more intermediate formats - tested on BE mips too --- libswscale/Makefile | 1 + libswscale/tests/.gitignore | 1 + libswscale/tests/floatimg_cmp.c | 296 tests/fate/libswscale.mak | 4 + tests/ref/fate/sws-floatimg-cmp | 120 + 5 files changed, 422 insertions(+) create mode 100644 libswscale/tests/floatimg_cmp.c create mode 100644 tests/ref/fate/sws-floatimg-cmp diff --git a/libswscale/Makefile b/libswscale/Makefile index 5e03e6fa0a..4b8f9de425 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -25,5 +25,6 @@ OBJS-$(CONFIG_SHARED)+= log2_tab.o SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o TESTPROGS = colorspace \ +floatimg_cmp\ pixdesc_query \ swscale \ diff --git a/libswscale/tests/.gitignore b/libswscale/tests/.gitignore index 1a26f038c4..c56abf0ee7 100644 --- a/libswscale/tests/.gitignore +++ b/libswscale/tests/.gitignore @@ -1,3 +1,4 @@ /colorspace +/floatimg_cmp /pixdesc_query /swscale diff --git a/libswscale/tests/floatimg_cmp.c b/libswscale/tests/floatimg_cmp.c new file mode 100644 index 00..5c67594fb6 --- /dev/null +++ b/libswscale/tests/floatimg_cmp.c @@ -0,0 +1,296 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include + +#include "libavutil/avutil.h" +#include "libavutil/imgutils.h" +#include "libavutil/intfloat.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/lfg.h" +#include "libavutil/mem.h" +#include "libavutil/parseutils.h" +#include "libavutil/pixdesc.h" + +#include "libswscale/swscale.h" + +#define DEFAULT_W 96 +#define DEFAULT_H 96 + +static const enum AVPixelFormat pix_fmts[] = { +AV_PIX_FMT_YUV444P16LE, +AV_PIX_FMT_YUV444P, +AV_PIX_FMT_YUV444P9LE, AV_PIX_FMT_YUV444P10LE, +AV_PIX_FMT_YUV444P12LE, AV_PIX_FMT_YUV444P14LE, +AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, +AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA, +AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR, +AV_PIX_FMT_0RGB, AV_PIX_FMT_0BGR, +AV_PIX_FMT_RGB0, AV_PIX_FMT_BGR0, +AV_PIX_FMT_RGB48LE, AV_PIX_FMT_BGR48LE, +AV_PIX_FMT_RGBA64LE, AV_PIX_FMT_BGRA64LE, +AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, +AV_PIX_FMT_GBRP9LE, +AV_PIX_FMT_GBRP10LE, AV_PIX_FMT_GBRAP10LE, +AV_PIX_FMT_GBRP12LE, AV_PIX_FMT_GBRAP12LE, +AV_PIX_FMT_GBRP14LE, +AV_PIX_FMT_GBRP16LE, AV_PIX_FMT_GBRAP16LE +}; + +const char *usage = "floatimg_cmp -pixel_format -size -ref \n"; + +int main(int argc, char **argv) +{ +enum AVPixelFormat inFormat = AV_PIX_FMT_NONE; +enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; +const AVPixFmtDescriptor *desc; +uint8_t *ptr; +uint32_t *in, *out; + +uint8_t *rgbIn[4] = {NULL, NULL, NULL, NULL}; +uint8_t *rgbOut[4] = {NULL, NULL, NULL, NULL}; +int rgbStride[4]; + +uint8_t *dst[4] = {NULL, NULL, NULL, NULL}; +int dstStride[4]; + +int i, x, y, p, size, count; +int res = -1; +int w = -1; +int h = -1; +union av_intfloat32 v0, v1; + +double sum; +float minimum, maximum, diff; + +struct SwsContext *sws = NULL; +AVLFG rand; +FILE *fp = NULL; + +for (i = 1; i < argc; i += 2) { +if (argv[i][0] != '-' || i + 1 == argc) +goto bad_option; +if (!strcmp(argv[i], "-ref")) { +fp = fopen(argv[i + 1], "rb"); +if (!fp) { +fprintf(stderr, "could not open '%s'\n", argv[i + 1]); +goto end; +} +} else if (!strcmp(argv[i], "-size")) { +res = av_parse_video_size(&w, &h, argv[i + 1]); +if (res < 0) { +fprintf(stderr, "invalid video size %s\n", argv[i + 1]); +goto end; +} +} else if (!strcmp(argv[i], "-pixel_format")) { +inFormat = av_get_pix_fmt(argv[i + 1]); +if (inFormat == AV_PIX_FMT_NONE) { +fprintf(stde
[FFmpeg-devel] [PATCH] libavcodec/exr: fix incorrect translation of denorm mantissa
From: Mark Reid Hi, This fixes a very subtle error thats hard notice until up unpremultiply a image. This loop is suppose to stop at first 1, instead was stoping at first 0 The comment is correct through! openexrs implementation is very similar. https://github.com/AcademySoftwareFoundation/openexr/blob/master/IlmBase/Half/toFloat.cpp#L85 not all the exr tests needed to be fixed because only some have denorm values --- libavcodec/exr.c | 2 +- tests/ref/fate/exr-rgba-multiscanline-half-b44 | 2 +- tests/ref/fate/exr-slice-raw | 2 +- tests/ref/fate/exr-slice-rle | 2 +- tests/ref/fate/exr-slice-zip1 | 2 +- tests/ref/fate/exr-slice-zip16 | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 829d38143d..216d216785 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -201,7 +201,7 @@ static union av_intfloat32 exr_half2float(uint16_t hf) mantissa <<= 1; exp = HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP; // check for leading 1 in denorm mantissa -while ((mantissa & (1 << 10))) { +while (!(mantissa & (1 << 10))) { // for every leading 0, decrement single precision exponent by 1 // and shift half-float mantissa value to the left mantissa <<= 1; diff --git a/tests/ref/fate/exr-rgba-multiscanline-half-b44 b/tests/ref/fate/exr-rgba-multiscanline-half-b44 index 964bf2e65e..24525b92a7 100644 --- a/tests/ref/fate/exr-rgba-multiscanline-half-b44 +++ b/tests/ref/fate/exr-rgba-multiscanline-half-b44 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 935x251 #sar 0: 1/1 -0, 0, 0,1, 3754960, 0x4d48a1b2 +0, 0, 0,1, 3754960, 0x8d9af112 diff --git a/tests/ref/fate/exr-slice-raw b/tests/ref/fate/exr-slice-raw index c7096e4d2a..1e7d3825ea 100644 --- a/tests/ref/fate/exr-slice-raw +++ b/tests/ref/fate/exr-slice-raw @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 587x675 #sar 0: 1/1 -0, 0, 0,1, 6339600, 0x4f2b496b +0, 0, 0,1, 6339600, 0xda3e31df diff --git a/tests/ref/fate/exr-slice-rle b/tests/ref/fate/exr-slice-rle index c7096e4d2a..1e7d3825ea 100644 --- a/tests/ref/fate/exr-slice-rle +++ b/tests/ref/fate/exr-slice-rle @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 587x675 #sar 0: 1/1 -0, 0, 0,1, 6339600, 0x4f2b496b +0, 0, 0,1, 6339600, 0xda3e31df diff --git a/tests/ref/fate/exr-slice-zip1 b/tests/ref/fate/exr-slice-zip1 index c7096e4d2a..1e7d3825ea 100644 --- a/tests/ref/fate/exr-slice-zip1 +++ b/tests/ref/fate/exr-slice-zip1 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 587x675 #sar 0: 1/1 -0, 0, 0,1, 6339600, 0x4f2b496b +0, 0, 0,1, 6339600, 0xda3e31df diff --git a/tests/ref/fate/exr-slice-zip16 b/tests/ref/fate/exr-slice-zip16 index c7096e4d2a..1e7d3825ea 100644 --- a/tests/ref/fate/exr-slice-zip16 +++ b/tests/ref/fate/exr-slice-zip16 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 587x675 #sar 0: 1/1 -0, 0, 0,1, 6339600, 0x4f2b496b +0, 0, 0,1, 6339600, 0xda3e31df -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avfilter/vf_premultiply: add missing AV_PIX_FMT_YUVA444P12
From: Mark Reid query_formats says its supported, but is missing from switch statement leading to segfault --- libavfilter/vf_premultiply.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavfilter/vf_premultiply.c b/libavfilter/vf_premultiply.c index 7e5b2aa97f..e051cadac0 100644 --- a/libavfilter/vf_premultiply.c +++ b/libavfilter/vf_premultiply.c @@ -546,6 +546,7 @@ static int filter_frame(AVFilterContext *ctx, case AV_PIX_FMT_YUV444P10: case AV_PIX_FMT_YUVA444P10: case AV_PIX_FMT_YUV444P12: +case AV_PIX_FMT_YUVA444P12: case AV_PIX_FMT_YUV444P14: case AV_PIX_FMT_YUV444P16: case AV_PIX_FMT_YUVA444P16: @@ -597,6 +598,7 @@ static int filter_frame(AVFilterContext *ctx, case AV_PIX_FMT_YUV444P10: case AV_PIX_FMT_YUVA444P10: case AV_PIX_FMT_YUV444P12: +case AV_PIX_FMT_YUVA444P12: case AV_PIX_FMT_YUV444P14: case AV_PIX_FMT_YUV444P16: case AV_PIX_FMT_YUVA444P16: -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 0/2] libswcale/input: fix incorrect rgbf32 yuv conversions
From: Mark Reid Hi, I'm not sure how I didn't notice this before but the f32 to yuv conversion is slightly off. the folowinng 2 patches fix it. In doing this I found that one of the exr tests probably wasn't testing the right layer, so I fixed that before. Mark Reid (2): fate: use correct uint32 layer libswcale/input: fix incorrect rgbf32 yuv conversions libswscale/input.c | 12 +--- tests/fate/image.mak | 2 +- .../ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 | 2 +- tests/ref/fate/filter-pixfmts-scale | 8 4 files changed, 11 insertions(+), 13 deletions(-) -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] libswcale/input: fix incorrect rgbf32 yuv conversions
From: Mark Reid --- libswscale/input.c | 12 +--- tests/ref/fate/filter-pixfmts-scale | 8 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 064ed5902f..67a85b0418 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -984,15 +984,14 @@ static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t *_dstV, uint16_t *dstV = (uint16_t *)_dstV; int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; -int bpc = 16; -int shift = 14; + for (i = 0; i < width; i++) { int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); -dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); -dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); +dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; +dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -1003,14 +1002,13 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s uint16_t *dst= (uint16_t *)_dst; int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; -int bpc = 16; -int shift = 14; + for (i = 0; i < width; i++) { int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); -dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); +dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale index d7020ad2c3..30e7cd5b06 100644 --- a/tests/ref/fate/filter-pixfmts-scale +++ b/tests/ref/fate/filter-pixfmts-scale @@ -25,8 +25,8 @@ gbrap12be 1d9b57766ba9c2192403f43967cb9af0 gbrap12le bb1ba1c157717db3dd612a76d38a018e gbrap16be c72b935a6e57a8e1c37bff08c2db55b1 gbrap16le 13eb0e62b1ac9c1c86c81521eaefab5f -gbrapf32be 42e53d9edccbd9e09c4cd78780ba92f3 -gbrapf32le eebf3973ef94c841f0a1ceb1ed61621d +gbrapf32be 366b804d5697276e8c481c4bdf05a00b +gbrapf32le 558a268e6d6b907449d1056afab78f29 gbrpdc3387f925f972c61aae7eb23cdc19f0 gbrp10be0277d4c3a8498d75e2783fb81379e481 gbrp10lef3d70f8ab845c3c9b8f7452e4a6e285a @@ -38,8 +38,8 @@ gbrp16be5fc826cfabebfc1442cb793c4b6303e2 gbrp16le1b3e0b63d47a3e1b6b20931316883bf2 gbrp9be d9c88968001e1452ff31fbc8d16b18a0 gbrp9le 2ccfed0816bf6bd4bb3a5b7591d9603a -gbrpf32be 4614d32e4417f80e0adcc1bdcf6cde42 -gbrpf32le 1366ee77e5559672260bbe51040e28b2 +gbrpf32be f3d0cefdf11c861001880772d817aac8 +gbrpf32le 290468205c1c18a0667edfca45061aee gray221201cc7cfc4964eacd8b3e426fd276 gray10be9452756d0b37f4f5c7cae7635e22d747 gray10le37fd2e1ec6b66410212d39a342e864df -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] fate: use correct uint32 layer
From: Mark Reid --- tests/fate/image.mak | 2 +- tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fate/image.mak b/tests/fate/image.mak index 756d01c667..69b4ea5431 100644 --- a/tests/fate/image.mak +++ b/tests/fate/image.mak @@ -221,7 +221,7 @@ FATE_EXR += fate-exr-rgb-scanline-pxr24-float-half-l2 fate-exr-rgb-scanline-pxr24-float-half-l2: CMD = framecrc -layer "VRaySamplerInfo" -i $(TARGET_SAMPLES)/exr/rgb_scanline_pxr24_float_half.exr -pix_fmt gbrapf32le FATE_EXR += fate-exr-rgb-scanline-pxr24-half-uint32-13x9 -fate-exr-rgb-scanline-pxr24-half-uint32-13x9: CMD = framecrc -i $(TARGET_SAMPLES)/exr/rgb_scanline_pxr24_half_uint32_13x9.exr -pix_fmt rgb48le -vf scale +fate-exr-rgb-scanline-pxr24-half-uint32-13x9: CMD = framecrc -layer "VRaySamplerInfo" -i $(TARGET_SAMPLES)/exr/rgb_scanline_pxr24_half_uint32_13x9.exr -pix_fmt rgb48le -vf scale FATE_EXR += fate-exr-rgb-scanline-zip-half-float-l1 fate-exr-rgb-scanline-zip-half-float-l1: CMD = framecrc -i $(TARGET_SAMPLES)/exr/rgb_scanline_zip_half_float.exr -pix_fmt gbrpf32le diff --git a/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 b/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 index 523ed9c88b..2d209d8a63 100644 --- a/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 +++ b/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 13x9 #sar 0: 9/10 -0, 0, 0,1, 702, 0x68c1450d +0, 0, 0,1, 702, 0x86132f10 -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/1] avfilter/vf_premultiply: add support for gbrapf32 format
From: Mark Reid --- libavfilter/vf_premultiply.c | 118 ++- 1 file changed, 115 insertions(+), 3 deletions(-) diff --git a/libavfilter/vf_premultiply.c b/libavfilter/vf_premultiply.c index 5d053b1f77..7e5b2aa97f 100644 --- a/libavfilter/vf_premultiply.c +++ b/libavfilter/vf_premultiply.c @@ -73,7 +73,7 @@ static int query_formats(AVFilterContext *ctx) AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV444P16, AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, -AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, +AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16, AV_PIX_FMT_NONE }; @@ -82,7 +82,7 @@ static int query_formats(AVFilterContext *ctx) AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16, AV_PIX_FMT_GBRAP, -AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, +AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, AV_PIX_FMT_GBRAPF32, AV_PIX_FMT_NONE }; @@ -218,6 +218,54 @@ static void premultiply16offset(const uint8_t *mmsrc, const uint8_t *aasrc, } } +static void premultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc, + uint8_t *ddst, + ptrdiff_t mlinesize, ptrdiff_t alinesize, + ptrdiff_t dlinesize, + int w, int h, + int half, int shift, int offset) +{ +const float *msrc = (const float *)mmsrc; +const float *asrc = (const float *)aasrc; +float *dst = (float *)ddst; +int x, y; + +for (y = 0; y < h; y++) { +for (x = 0; x < w; x++) { +dst[x] = msrc[x] * asrc[x]; +} + +dst += dlinesize / 4; +msrc += mlinesize / 4; +asrc += alinesize / 4; +} +} + +static void premultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc, +uint8_t *ddst, +ptrdiff_t mlinesize, ptrdiff_t alinesize, +ptrdiff_t dlinesize, +int w, int h, +int half, int shift, int offset) +{ +const float *msrc = (const float *)mmsrc; +const float *asrc = (const float *)aasrc; +float *dst = (float *)ddst; +int x, y; + +float offsetf = offset / 65535.0f; + +for (y = 0; y < h; y++) { +for (x = 0; x < w; x++) { +dst[x] = ((msrc[x] - offsetf) * asrc[x]) + offsetf; +} + +dst += dlinesize / 4; +msrc += mlinesize / 4; +asrc += alinesize / 4; +} +} + static void unpremultiply8(const uint8_t *msrc, const uint8_t *asrc, uint8_t *dst, ptrdiff_t mlinesize, ptrdiff_t alinesize, @@ -365,6 +413,62 @@ static void unpremultiply16offset(const uint8_t *mmsrc, const uint8_t *aasrc, } } +static void unpremultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc, +uint8_t *ddst, +ptrdiff_t mlinesize, ptrdiff_t alinesize, +ptrdiff_t dlinesize, +int w, int h, +int half, int max, int offset) +{ +const float *msrc = (const float *)mmsrc; +const float *asrc = (const float *)aasrc; + +float *dst = (float *)ddst; +int x, y; + +for (y = 0; y < h; y++) { +for (x = 0; x < w; x++) { +if (asrc[x] > 0.0f) +dst[x] = msrc[x] / asrc[x]; +else +dst[x] = msrc[x]; +} + +dst += dlinesize / 4; +msrc += mlinesize / 4; +asrc += alinesize / 4; +} +} + +static void unpremultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc, +uint8_t *ddst, +ptrdiff_t mlinesize, ptrdiff_t alinesize, +ptrdiff_t dlinesize, +int w, int h, +int half, int max, int offset) +{ +const float *msrc = (const float *)mmsrc; +const float *asrc = (const float *)aasrc; + +float *dst = (float *)ddst; +int x, y; + +float offsetf = offset / 65535.0f; + +for (y = 0; y < h; y++) { +for (x = 0; x < w; x++) { +if (asrc[x] > 0.0f) +dst[x] = (msrc[x] - offsetf) / asrc[x] + offsetf; +else +dst[x] = msrc[x]; +} + +dst += dlinesize / 4; +msrc += mlinesize / 4; +asrc += alinesize / 4; +} +} + static int premultiply_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { PreMultiplyContext *s = ctx->pri
[FFmpeg-devel] [PATCH 1/1] avcodec/exr: add support data windows larger or outside display window
From: Mark Reid Hi, The following patch adds exr support for data windows that are larger or outside the display window. This adds support for both scanline and tiled formats. Here are the added exr files for the fate tests. Could somebody upload them to fate for me? https://www.dropbox.com/s/m0941dmjoejskp0/exr_datawindow_fate_files.zip --- libavcodec/exr.c | 110 ++ tests/fate/image.mak | 31 + .../fate/exr-rgb-scanline-float-zip-dw-large | 6 + .../fate/exr-rgb-scanline-half-piz-dw-large | 6 + .../fate/exr-rgb-scanline-half-zip-dw-large | 6 + .../fate/exr-rgb-scanline-half-zip-dw-outside | 6 + .../fate/exr-rgb-scanline-uint32-piz-dw-large | 6 + tests/ref/fate/exr-rgb-tile-half-piz-dw-large | 6 + tests/ref/fate/exr-rgb-tile-half-zip | 6 + .../ref/fate/exr-rgb-tile-half-zip-dw-outside | 6 + .../ref/fate/exr-rgb-tile-uint32-piz-dw-large | 6 + tests/ref/fate/exr-ya-scanline-zip-half-12x8 | 6 + 12 files changed, 155 insertions(+), 46 deletions(-) create mode 100644 tests/ref/fate/exr-rgb-scanline-float-zip-dw-large create mode 100644 tests/ref/fate/exr-rgb-scanline-half-piz-dw-large create mode 100644 tests/ref/fate/exr-rgb-scanline-half-zip-dw-large create mode 100644 tests/ref/fate/exr-rgb-scanline-half-zip-dw-outside create mode 100644 tests/ref/fate/exr-rgb-scanline-uint32-piz-dw-large create mode 100644 tests/ref/fate/exr-rgb-tile-half-piz-dw-large create mode 100644 tests/ref/fate/exr-rgb-tile-half-zip create mode 100644 tests/ref/fate/exr-rgb-tile-half-zip-dw-outside create mode 100644 tests/ref/fate/exr-rgb-tile-uint32-piz-dw-large create mode 100644 tests/ref/fate/exr-ya-scanline-zip-half-12x8 diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 68d5befa40..d5f12cb22a 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -134,8 +134,8 @@ typedef struct EXRContext { const AVPixFmtDescriptor *desc; int w, h; -uint32_t xmax, xmin; -uint32_t ymax, ymin; +int32_t xmax, xmin; +int32_t ymax, ymin; uint32_t xdelta, ydelta; int scan_lines_per_block; @@ -995,12 +995,13 @@ static int decode_block(AVCodecContext *avctx, void *tdata, uint64_t line_offset, uncompressed_size; uint8_t *ptr; uint32_t data_size; -uint64_t line, col = 0; +int line, col = 0; uint64_t tile_x, tile_y, tile_level_x, tile_level_y; const uint8_t *src; int step = s->desc->flags & AV_PIX_FMT_FLAG_FLOAT ? 4 : 2 * s->desc->nb_components; -int axmax = (avctx->width - (s->xmax + 1)) * step; /* nb pixel to add at the right of the datawindow */ -int bxmin = s->xmin * step; /* nb pixel to add at the left of the datawindow */ +int bxmin, axmax, window_xoffset = 0; +int window_xmin, window_xmax, window_ymin, window_ymax; +int data_xoffset, data_yoffset, data_window_offset, xsize, ysize; int i, x, buf_size = s->buf_size; int c, rgb_channel_count; float one_gamma = 1.0f / s->gamma; @@ -1029,28 +1030,16 @@ static int decode_block(AVCodecContext *avctx, void *tdata, return AVERROR_PATCHWELCOME; } -if (s->xmin || s->ymin) { -avpriv_report_missing_feature(s->avctx, "Tiles with xmin/ymin"); -return AVERROR_PATCHWELCOME; -} - -line = s->tile_attr.ySize * tile_y; +line = s->ymin + s->tile_attr.ySize * tile_y; col = s->tile_attr.xSize * tile_x; if (line < s->ymin || line > s->ymax || -col < s->xmin || col > s->xmax) +s->xmin + col < s->xmin || s->xmin + col > s->xmax) return AVERROR_INVALIDDATA; td->ysize = FFMIN(s->tile_attr.ySize, s->ydelta - tile_y * s->tile_attr.ySize); td->xsize = FFMIN(s->tile_attr.xSize, s->xdelta - tile_x * s->tile_attr.xSize); -if (col) { /* not the first tile of the line */ -bxmin = 0; /* doesn't add pixel at the left of the datawindow */ -} - -if ((col + td->xsize) != s->xdelta)/* not the last tile of the line */ -axmax = 0; /* doesn't add pixel at the right of the datawindow */ - td->channel_line_size = td->xsize * s->current_channel_offset;/* uncompress size of one line */ uncompressed_size = td->channel_line_size * (uint64_t)td->ysize;/* uncompress size of the block */ } else { @@ -1081,6 +1070,33 @@ static int decode_block(AVCodecContext *avctx, void *tdata, } } +window_xmin = FFMIN(avctx->width, FFMAX(0, s->xmin + col)); +window_xmax = FFMIN(avctx->width, FFMAX(0, s->xmin + col + td->xsize)); +window_ymin = FFMIN(avctx->height, FFMAX(0, line )); +window_ymax = FFMIN(avctx->height, FFMAX(0, line + td->ysize)); +xsize = window_xmax - window_xmin; +ysize = window_ymax - window_ymin; + +/* tile or scanline not visible skip decoding */ +if (xsize <= 0 || ysize <= 0) +return 0; + +/* i
[FFmpeg-devel] [PATCH] avfilter/vf_lut3d: support remaping negative values in the prelut
From: Mark Reid --- libavfilter/vf_lut3d.c | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c index 6730a424ce..988f6c8b55 100644 --- a/libavfilter/vf_lut3d.c +++ b/libavfilter/vf_lut3d.c @@ -878,18 +878,16 @@ static int parse_cinespace(AVFilterContext *ctx, FILE *f) prelut_sizes[i] = npoints; in_min[i] = FLT_MAX; -in_max[i] = FLT_MIN; +in_max[i] = -FLT_MAX; out_min[i] = FLT_MAX; -out_max[i] = FLT_MIN; - -last = FLT_MIN; +out_max[i] = -FLT_MAX; for (int j = 0; j < npoints; j++) { NEXT_FLOAT_OR_GOTO(v, end) in_min[i] = FFMIN(in_min[i], v); in_max[i] = FFMAX(in_max[i], v); in_prelut[i][j] = v; -if (v < last) { +if (j > 0 && v < last) { av_log(ctx, AV_LOG_ERROR, "Invalid file, non increasing prelut.\n"); ret = AVERROR(ENOMEM); goto end; -- 2.27.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d: prelut support for 3d cinespace luts
From: Mark Reid changes since v1: * cleaned up code style * slightly reworked apply_lut functions to feel more consistent with code --- libavfilter/vf_lut3d.c | 372 +++-- 1 file changed, 317 insertions(+), 55 deletions(-) diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c index 482e2394a7..e5d9fcc068 100644 --- a/libavfilter/vf_lut3d.c +++ b/libavfilter/vf_lut3d.c @@ -59,6 +59,15 @@ struct rgbvec { /* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT * of 512x512 (64x64x64) */ #define MAX_LEVEL 256 +#define PRELUT_SIZE 65536 + +typedef struct Lut3DPreLut { +int size; +float min[3]; +float max[3]; +float scale[3]; +float* lut[3]; +} Lut3DPreLut; typedef struct LUT3DContext { const AVClass *class; @@ -71,6 +80,7 @@ typedef struct LUT3DContext { struct rgbvec *lut; int lutsize; int lutsize2; +Lut3DPreLut prelut; #if CONFIG_HALDCLUT_FILTER uint8_t clut_rgba_map[4]; int clut_step; @@ -234,11 +244,39 @@ static inline struct rgbvec interp_tetrahedral(const LUT3DContext *lut3d, return c; } +static inline float prelut_interp_1d_linear(const Lut3DPreLut *prelut, +int idx, const float s) +{ +const int lut_max = prelut->size - 1; +const float scaled = (s - prelut->min[idx]) * prelut->scale[idx]; +const float x = av_clipf(scaled, 0.0f, lut_max); +const int prev = PREV(x); +const int next = FFMIN((int)(x) + 1, lut_max); +const float p = prelut->lut[idx][prev]; +const float n = prelut->lut[idx][next]; +const float d = x - (float)prev; +return lerpf(p, n, d); +} + +static inline struct rgbvec apply_prelut(const Lut3DPreLut *prelut, + const struct rgbvec *s) +{ +if (prelut->size <= 0) +return *s; + +struct rgbvec c; +c.r = prelut_interp_1d_linear(prelut, 0, s->r); +c.g = prelut_interp_1d_linear(prelut, 1, s->g); +c.b = prelut_interp_1d_linear(prelut, 2, s->b); +return c; +} + #define DEFINE_INTERP_FUNC_PLANAR(name, nbits, depth) \ static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \ { \ int x, y; \ const LUT3DContext *lut3d = ctx->priv; \ +const Lut3DPreLut *prelut = &lut3d->prelut; \ const ThreadData *td = arg; \ const AVFrame *in = td->in; \ const AVFrame *out = td->out; \ @@ -253,9 +291,11 @@ static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1]; \ const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2]; \ const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3]; \ -const float scale_r = (lut3d->scale.r / ((1scale.g / ((1 scale.b / ((1 lutsize - 1; \ +const float scale_f = 1.0f / ((1 scale.g * lut_max; \ +const float scale_b = lut3d->scale.b * lut_max; \ \ for (y = slice_start; y < slice_end; y++) { \ uint##nbits##_t *dstg = (uint##nbits##_t *)grow; \ @@ -267,9 +307,13 @@ static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow; \ const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow; \ for (x = 0; x < in->width; x++) { \ -const struct rgbvec scaled_rgb = {srcr[x] * scale_r,
[FFmpeg-devel] [PATCH 2/2] avfilter/vf_lut3d: prelut support for 3d cinespace luts
From: Mark Reid --- libavfilter/vf_lut3d.c | 367 +++-- 1 file changed, 312 insertions(+), 55 deletions(-) diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c index 482e2394a7..4067c4a60a 100644 --- a/libavfilter/vf_lut3d.c +++ b/libavfilter/vf_lut3d.c @@ -59,6 +59,15 @@ struct rgbvec { /* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT * of 512x512 (64x64x64) */ #define MAX_LEVEL 256 +#define PRELUT_SIZE 65536 + +typedef struct Lut3DPreLut { +int size; +float min[3]; +float max[3]; +float scale[3]; +float* lut[3]; +} Lut3DPreLut; typedef struct LUT3DContext { const AVClass *class; @@ -71,6 +80,7 @@ typedef struct LUT3DContext { struct rgbvec *lut; int lutsize; int lutsize2; +Lut3DPreLut prelut; #if CONFIG_HALDCLUT_FILTER uint8_t clut_rgba_map[4]; int clut_step; @@ -234,6 +244,31 @@ static inline struct rgbvec interp_tetrahedral(const LUT3DContext *lut3d, return c; } +static inline float apply_prelut_channel(const LUT3DContext *lut3d, float x, int c) +{ + +x = (x - lut3d->prelut.min[c]) * lut3d->prelut.scale[c]; +x = av_clipf(x, 0.0f, lut3d->prelut.size-1); + +const float a = lut3d->prelut.lut[c][PREV(x)]; +const float b = lut3d->prelut.lut[c][(FFMIN((int)(x) + 1, lut3d->prelut.size - 1))]; +const float mix = x - (float)PREV(x); + +return lerpf(a, b, mix); +} + +static inline struct rgbvec apply_prelut(const LUT3DContext *lut3d, const struct rgbvec *s) +{ +if (lut3d->prelut.size <= 0) +return *s; + +struct rgbvec c; +c.r = apply_prelut_channel(lut3d, s->r, 0); +c.g = apply_prelut_channel(lut3d, s->g, 1); +c.b = apply_prelut_channel(lut3d, s->b, 2); +return c; +} + #define DEFINE_INTERP_FUNC_PLANAR(name, nbits, depth) \ static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \ { \ @@ -253,9 +288,11 @@ static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1]; \ const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2]; \ const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3]; \ -const float scale_r = (lut3d->scale.r / ((1scale.g / ((1 scale.b / ((1 lutsize - 1; \ +const float scale_f = 1.0f / ((1 scale.g * lut_max; \ +const float scale_b = lut3d->scale.b * lut_max; \ \ for (y = slice_start; y < slice_end; y++) { \ uint##nbits##_t *dstg = (uint##nbits##_t *)grow; \ @@ -267,9 +304,13 @@ static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow; \ const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow; \ for (x = 0; x < in->width; x++) { \ -const struct rgbvec scaled_rgb = {srcr[x] * scale_r, \ - srcg[x] * scale_g, \ - srcb[x] * scale_b}; \ +const struct rgbvec rgb = {srcr[x] * scale_f, \ + srcg[x] * scale_f, \ + srcb[x] * scale_f}; \ +const struct rgbvec prelut_rgb = apply_prelut(lut3d, &rgb); \ +const struct rgbvec scaled_rgb = {av_clipf(prelut_rgb.r * scale_r, 0, lut_max),\ + av_clipf(prelut_rgb.g * scale_g, 0, lut_max),\ + av_clipf(prelut_rgb.b *
[FFmpeg-devel] [PATCH 1/2] avfilter/vf_lut3d: initial float pixel format support
From: Mark Reid --- libavfilter/vf_lut3d.c | 207 +++-- 1 file changed, 201 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c index fda85b16b4..482e2394a7 100644 --- a/libavfilter/vf_lut3d.c +++ b/libavfilter/vf_lut3d.c @@ -24,9 +24,12 @@ * 3D Lookup table filter */ +#include "float.h" + #include "libavutil/opt.h" #include "libavutil/file.h" #include "libavutil/intreadwrite.h" +#include "libavutil/intfloat.h" #include "libavutil/avassert.h" #include "libavutil/pixdesc.h" #include "libavutil/avstring.h" @@ -73,6 +76,7 @@ typedef struct LUT3DContext { int clut_step; int clut_bits; int clut_planar; +int clut_float; int clut_width; FFFrameSync fs; #endif @@ -91,6 +95,30 @@ typedef struct ThreadData { { "tetrahedral", "interpolate values using a tetrahedron", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TETRAHEDRAL}, INT_MIN, INT_MAX, FLAGS, "interp_mode" }, \ { NULL } +#define EXPONENT_MASK 0x7F80 +#define MANTISSA_MASK 0x007F +#define SIGN_MASK 0x7FFF + +static inline float sanitizef(float f) +{ +union av_intfloat32 t; +t.f = f; + +if ((t.i & EXPONENT_MASK) == EXPONENT_MASK) { +if ((t.i & MANTISSA_MASK) != 0) { +// NAN +return 0.0f; +} else if (t.i & SIGN_MASK) { +// -INF +return FLT_MIN; +} else { +// +INF +return FLT_MAX; +} +} +return f; +} + static inline float lerpf(float v0, float v1, float f) { return v0 + (v1 - v0) * f; @@ -285,6 +313,66 @@ DEFINE_INTERP_FUNC_PLANAR(nearest, 16, 16) DEFINE_INTERP_FUNC_PLANAR(trilinear, 16, 16) DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 16) +#define DEFINE_INTERP_FUNC_PLANAR_FLOAT(name, depth) \ +static int interp_##name##_pf##depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \ +{ \ +int x, y; \ +const LUT3DContext *lut3d = ctx->priv; \ +const ThreadData *td = arg; \ +const AVFrame *in = td->in; \ +const AVFrame *out = td->out; \ +const int direct = out == in; \ +const int slice_start = (in->height * jobnr ) / nb_jobs; \ +const int slice_end = (in->height * (jobnr+1)) / nb_jobs; \ +uint8_t *grow = out->data[0] + slice_start * out->linesize[0]; \ +uint8_t *brow = out->data[1] + slice_start * out->linesize[1]; \ +uint8_t *rrow = out->data[2] + slice_start * out->linesize[2]; \ +uint8_t *arow = out->data[3] + slice_start * out->linesize[3]; \ +const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0]; \ +const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1]; \ +const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2]; \ +const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3]; \ +const float lutsize = lut3d->lutsize - 1; \ +const float scale_r = lut3d->scale.r * lutsize; \ +const float scale_g = lut3d->scale.g * lutsize; \ +const float scale_b = lut3d->scale.b * lutsize; \ + \ +for (y = slice_start; y < slice_end; y++) { \ +float *dstg = (float *)grow; \ +float *dstb = (float *)brow; \ +float *dstr = (float *)rrow; \ +float *dsta = (float *)arow; \ +const float *srcg = (const float *)srcgrow;
[FFmpeg-devel] [PATCH] libswscale: fix for floating point formats, require full chroma
From: Mark Reid upon more floating point testing, looks like I missed adding this bit. --- libswscale/utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libswscale/utils.c b/libswscale/utils.c index 15c0a19afa..111062e915 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1405,6 +1405,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, srcFormat != AV_PIX_FMT_GBRP14BE && srcFormat != AV_PIX_FMT_GBRP14LE && srcFormat != AV_PIX_FMT_GBRP16BE && srcFormat != AV_PIX_FMT_GBRP16LE && srcFormat != AV_PIX_FMT_GBRAP16BE && srcFormat != AV_PIX_FMT_GBRAP16LE && +srcFormat != AV_PIX_FMT_GBRPF32BE && srcFormat != AV_PIX_FMT_GBRPF32LE && +srcFormat != AV_PIX_FMT_GBRAPF32BE && srcFormat != AV_PIX_FMT_GBRAPF32LE && ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) || (flags & SWS_FAST_BILINEAR))) c->chrSrcHSubSample = 1; -- 2.25.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] avcodec/exr: output float pixels in float pixel format
From: Mark Reid changes since v1 - default behavior, no longer hidden behind decoder parameter - updated tests to reflect change --- libavcodec/exr.c | 244 +- tests/fate/image.mak | 120 - tests/ref/fate/exr-rgb-b44a-half-negative-4x4 | 2 +- .../exr-rgb-scanline-b44-half-float-12x8-l1 | 2 +- .../exr-rgb-scanline-b44-half-float-12x8-l2 | 2 +- tests/ref/fate/exr-rgb-scanline-float-b44 | 2 +- .../ref/fate/exr-rgb-scanline-float-piz-48x32 | 2 +- tests/ref/fate/exr-rgb-scanline-half-b44-12x8 | 2 +- tests/ref/fate/exr-rgb-scanline-half-b44-13x9 | 2 +- tests/ref/fate/exr-rgb-scanline-half-piz-bw | 2 +- .../ref/fate/exr-rgb-scanline-half-piz-color | 2 +- .../ref/fate/exr-rgb-scanline-half-piz-dw-t01 | 2 +- .../ref/fate/exr-rgb-scanline-half-piz-dw-t08 | 2 +- .../fate/exr-rgb-scanline-none-negative-red | 2 +- .../fate/exr-rgb-scanline-pxr24-float-12x8| 2 +- .../fate/exr-rgb-scanline-pxr24-float-half-l1 | 2 +- .../fate/exr-rgb-scanline-pxr24-float-half-l2 | 2 +- .../fate/exr-rgb-scanline-pxr24-half-float-l1 | 2 +- .../fate/exr-rgb-scanline-pxr24-half-float-l2 | 2 +- .../exr-rgb-scanline-pxr24-half-uint32-13x9 | 2 +- .../fate/exr-rgb-scanline-raw-half-float-l1 | 2 +- .../fate/exr-rgb-scanline-raw-half-float-l2 | 2 +- .../fate/exr-rgb-scanline-rle-half-float-l1 | 2 +- .../fate/exr-rgb-scanline-rle-half-float-l2 | 2 +- .../fate/exr-rgb-scanline-zip-half-float-l1 | 2 +- .../fate/exr-rgb-scanline-zip-half-float-l2 | 2 +- .../fate/exr-rgb-scanline-zip1-half-float-l1 | 2 +- ...b-scanline-zip1-half-float-l1-zero-offsets | 2 +- .../fate/exr-rgb-scanline-zip1-half-float-l2 | 2 +- tests/ref/fate/exr-rgb-tile-float-raw-12x8| 2 +- tests/ref/fate/exr-rgb-tile-float-raw-150x130 | 2 +- .../fate/exr-rgb-tile-half-float-b44-12x8-l1 | 2 +- .../fate/exr-rgb-tile-half-float-b44-12x8-l2 | 2 +- tests/ref/fate/exr-rgb-tile-half-raw-12x8 | 2 +- .../ref/fate/exr-rgb-tile-pxr24-float-half-l1 | 2 +- .../ref/fate/exr-rgb-tile-pxr24-float-half-l2 | 2 +- .../ref/fate/exr-rgb-tile-pxr24-half-float-l1 | 2 +- .../ref/fate/exr-rgb-tile-pxr24-half-float-l2 | 2 +- tests/ref/fate/exr-rgb-tile-raw-half-float-l1 | 2 +- tests/ref/fate/exr-rgb-tile-raw-half-float-l2 | 2 +- tests/ref/fate/exr-rgb-tile-rle-half-float-l1 | 2 +- tests/ref/fate/exr-rgb-tile-rle-half-float-l2 | 2 +- tests/ref/fate/exr-rgb-tile-zip-half-float-l1 | 2 +- tests/ref/fate/exr-rgb-tile-zip-half-float-l2 | 2 +- .../ref/fate/exr-rgb-tile-zip1-half-float-l1 | 2 +- .../ref/fate/exr-rgb-tile-zip1-half-float-l2 | 2 +- .../ref/fate/exr-rgba-multiscanline-half-b44 | 2 +- .../exr-rgba-scanline-float-half-b44-12x8-l1 | 2 +- .../exr-rgba-scanline-float-half-b44-12x8-l2 | 2 +- .../exr-rgba-scanline-float-half-b44-13x9-l1 | 2 +- .../exr-rgba-scanline-float-half-b44-13x9-l2 | 2 +- .../exr-rgba-scanline-float-half-b44a-12x8-l1 | 2 +- .../exr-rgba-scanline-float-half-b44a-12x8-l2 | 2 +- .../exr-rgba-scanline-float-half-b44a-13x9-l1 | 2 +- .../exr-rgba-scanline-float-half-b44a-13x9-l2 | 2 +- tests/ref/fate/exr-rgba-zip16-16x32-flag4 | 2 +- tests/ref/fate/exr-slice-pxr24| 2 +- tests/ref/fate/exr-slice-raw | 2 +- tests/ref/fate/exr-slice-rle | 2 +- tests/ref/fate/exr-slice-zip1 | 2 +- tests/ref/fate/exr-slice-zip16| 2 +- tests/ref/fate/exr-y-scanline-zip-half-12x8 | 2 +- tests/ref/fate/exr-y-tile-zip-half-12x8 | 2 +- 63 files changed, 246 insertions(+), 240 deletions(-) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 73419eadb1..68d5befa40 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -30,7 +30,6 @@ * For more information on the OpenEXR format, visit: * http://openexr.com/ * - * exr_flt2uint() and exr_halflt2uint() is credited to Reimar Döffinger. * exr_half2float() is credited to Aaftab Munshi, Dan Ginsburg, Dave Shreiner. */ @@ -160,7 +159,7 @@ typedef struct EXRContext { enum AVColorTransferCharacteristic apply_trc_type; float gamma; -uint16_t gamma_table[65536]; +union av_intfloat32 gamma_table[65536]; } EXRContext; /* -15 stored using a single precision bias of 127 */ @@ -225,47 +224,6 @@ static union av_intfloat32 exr_half2float(uint16_t hf) return f; } - -/** - * Convert from 32-bit float as uint32_t to uint16_t. - * - * @param v 32-bit float - * - * @return normalized 16-bit unsigned int - */ -static inline uint16_t exr_flt2uint(int32_t v) -{ -int32_t exp = v >> 23; -// "HACK": negative values result in exp< 0, so clipping them to 0 -// is also handled by this condition, avoids explicit check for sign bit. -if (exp <= 127 + 7 - 24) // we would shift out all bits anyway -return 0; -if (exp >= 127) -ret
[FFmpeg-devel] [PATCH v2 2/2] libswscale: add output support for AV_PIX_FMT_GBRAPF32
From: Mark Reid --- libswscale/output.c | 82 libswscale/slice.c | 28 libswscale/swscale.c | 5 ++ libswscale/swscale_internal.h| 36 +++ libswscale/swscale_unscaled.c| 33 ++ libswscale/utils.c | 8 +-- tests/ref/fate/filter-pixdesc-gbrapf32be | 1 + tests/ref/fate/filter-pixdesc-gbrapf32le | 1 + tests/ref/fate/filter-pixdesc-gbrpf32be | 1 + tests/ref/fate/filter-pixdesc-gbrpf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ 20 files changed, 221 insertions(+), 15 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le diff --git a/libswscale/output.c b/libswscale/output.c index 68f43ffba3..e864e515d0 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2312,6 +2312,82 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t **dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx; +uint32_t **dest32 = (uint32_t**)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += 1 << 13; +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(Y + R, 30); +G = av_clip_uintp2(Y + G, 30); +B = av_clip_uintp2(Y + B, 30); + +dest32[0][i] = av_float2int(float_mult * (float)(G >> 14)); +dest32[1][i] = av_float2int(float_mult * (float)(B >> 14)); +dest32[2][i] = av_float2int(float_mult * (float)(R >> 14)); +if (hasAlpha) +dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +for (i = 0; i < dstW; i++) { +dest32[0][i] = av_bswap32(dest32[0][i]); +dest32[1][i] = av_bswap32(dest32[1][i]); +dest32[2][i] = av_bswap32(dest32[2][i]); +if (hasAlpha) +dest32[3][i] = av_bswap32(dest32[3][i]); +} +} +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2716,6 +2792,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_GBRAP16LE: *yuv2anyX = yuv2gbrp16_full_X_c; break; +case AV_PIX_FMT_GBRPF32BE: +case AV_PIX_FMT_GBRPF32LE: +case AV_PIX_FMT_GBRAPF32BE: +case AV_PIX_FMT_GBRAPF32LE: +*yuv2anyX = yuv2gbrpf32_full_X_c; +break; } if (!*yuv2packedX && !*yuv2anyX) goto YUV_PACKED; diff --git a/libswscale/slice.c b/libswscale/slice.c index db4fa874ff..7849b70f4d 100644
[FFmpeg-devel] [PATCH v2 1/2] libswscale: add input support AV_PIX_FMT_GBRAPF32
From: Mark Reid --- libswscale/input.c | 91 ++ libswscale/utils.c | 4 ++ 2 files changed, 95 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index 099661cb6d..e74cf04133 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -960,6 +960,59 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, } #undef rdpx +#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): av_int2float(AV_RL32(src))) + +static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) +{ +int i; +const float **src = (const float **)_src; +uint16_t *dst= (uint16_t *)_dst; + +for (i = 0; i < width; i++) { +dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i))); +} +} + +static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) +{ +int i; +const float **src = (const float **)_src; +uint16_t *dstU = (uint16_t *)_dstU; +uint16_t *dstV = (uint16_t *)_dstV; +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int bpc = 16; +int shift = 14; +for (i = 0; i < width; i++) { +int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); +int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); +int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); + +dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); +dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); +} +} + +static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) +{ +int i; +const float **src = (const float **)_src; +uint16_t *dst= (uint16_t *)_dst; + +int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +int bpc = 16; +int shift = 14; +for (i = 0; i < width; i++) { +int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); +int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); +int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); + +dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); +} +} + +#undef rdpx + static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { @@ -1022,6 +1075,26 @@ rgb9plus_planar_transparency_funcs(10) rgb9plus_planar_transparency_funcs(12) rgb9plus_planar_transparency_funcs(16) +#define rgbf32_planar_funcs_endian(endian_name, endian) \ +static void planar_rgbf32##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ + int w, int32_t *rgb2yuv) \ +{ \ +planar_rgbf32_to_y(dst, src, w, endian, rgb2yuv); \ +} \ +static void planar_rgbf32##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src[4], int w, int32_t *rgb2yuv) \ +{ \ +planar_rgbf32_to_uv(dstU, dstV, src, w, endian, rgb2yuv); \ +} \ +static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4], \ + int w, int32_t *rgb2yuv) \ +{ \ +planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv); \ +} + +rgbf32_planar_funcs_endian(le, 0) +rgbf32_planar_funcs_endian(be, 1) + av_cold void ff_sws_init_input_funcs(SwsContext *c) { enum AVPixelFormat srcFormat = c->srcFormat; @@ -1070,6 +1143,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break; +case AV_PIX_FMT_GBRAPF32LE: +case AV_PIX_FMT_GBRPF32LE: +c->readChrPlanar = planar_rgbf32le_to_uv; +break; case AV_PIX_F
[FFmpeg-devel] [PATCH v2 0/2] initial input/output support for AV_PIX_FMT_GBRAPF32
From: Mark Reid changes since v1 - added missing fillPlane32 function - tests should pass now for qemu-mips - removed exr patch for now Mark Reid (2): libswscale: add input support AV_PIX_FMT_GBRAPF32 libswscale: add output support for AV_PIX_FMT_GBRAPF32 libswscale/input.c | 91 libswscale/output.c | 82 + libswscale/slice.c | 28 +--- libswscale/swscale.c | 5 ++ libswscale/swscale_internal.h| 36 ++ libswscale/swscale_unscaled.c| 33 + libswscale/utils.c | 4 ++ tests/ref/fate/filter-pixdesc-gbrapf32be | 1 + tests/ref/fate/filter-pixdesc-gbrapf32le | 1 + tests/ref/fate/filter-pixdesc-gbrpf32be | 1 + tests/ref/fate/filter-pixdesc-gbrpf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ 21 files changed, 312 insertions(+), 11 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le -- 2.25.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 3/3] avcodec/exr: add option to output pixels in float
From: Mark Reid --- libavcodec/exr.c | 103 +++ 1 file changed, 86 insertions(+), 17 deletions(-) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 73419eadb1..f86e97a433 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -161,6 +161,7 @@ typedef struct EXRContext { enum AVColorTransferCharacteristic apply_trc_type; float gamma; uint16_t gamma_table[65536]; +int output_float; } EXRContext; /* -15 stored using a single precision bias of 127 */ @@ -1035,14 +1036,14 @@ static int decode_block(AVCodecContext *avctx, void *tdata, const uint8_t *channel_buffer[4] = { 0 }; const uint8_t *buf = s->buf; uint64_t line_offset, uncompressed_size; -uint16_t *ptr_x; uint8_t *ptr; uint32_t data_size; uint64_t line, col = 0; uint64_t tile_x, tile_y, tile_level_x, tile_level_y; const uint8_t *src; -int axmax = (avctx->width - (s->xmax + 1)) * 2 * s->desc->nb_components; /* nb pixel to add at the right of the datawindow */ -int bxmin = s->xmin * 2 * s->desc->nb_components; /* nb pixel to add at the left of the datawindow */ +int step = s->desc->flags & AV_PIX_FMT_FLAG_FLOAT ? 4 : 2 * s->desc->nb_components; +int axmax = (avctx->width - (s->xmax + 1)) * step; /* nb pixel to add at the right of the datawindow */ +int bxmin = s->xmin * step; /* nb pixel to add at the left of the datawindow */ int i, x, buf_size = s->buf_size; int c, rgb_channel_count; float one_gamma = 1.0f / s->gamma; @@ -1175,6 +1176,58 @@ static int decode_block(AVCodecContext *avctx, void *tdata, if (s->channel_offsets[3] >= 0) channel_buffer[3] = src + td->xsize * s->channel_offsets[3]; +if (s->desc->flags & AV_PIX_FMT_FLAG_FLOAT) { + +/* todo: change this when a floating point pixel format with luma with alpha is implemented */ +int channel_count = s->channel_offsets[3] >= 0 ? 4 : rgb_channel_count; +if (s->is_luma) { +channel_buffer[1] = channel_buffer[0]; +channel_buffer[2] = channel_buffer[0]; +} + +for (c = 0; c < channel_count; c++) { +int plane = s->desc->comp[c].plane; +ptr = p->data[plane] + line * p->linesize[plane] + (col * 4); + +for (i = 0; i < td->ysize; i++, ptr += p->linesize[plane]) { +const uint8_t *src; +union av_intfloat32 *ptr_x; + +src = channel_buffer[c]; +ptr_x = (union av_intfloat32 *)ptr; + +// Zero out the start if xmin is not 0 +memset(ptr_x, 0, bxmin); +ptr_x += s->xmin; + +if (s->pixel_type == EXR_FLOAT) { +// 32-bit +for (x = 0; x < td->xsize; x++) { +ptr_x->i = bytestream_get_le32(&src); +ptr_x++; +} +} else if (s->pixel_type == EXR_HALF) { +// 16-bit +for (x = 0; x < td->xsize; x++) { +*ptr_x++ = exr_half2float(bytestream_get_le16(&src)); +} +} else if (s->pixel_type == EXR_UINT) { +const float float_mult = 1.0f / (float)UINT32_MAX; +for (x = 0; x < td->xsize; x++) { +ptr_x->f = float_mult * (float)bytestream_get_le32(&src); +ptr_x++; +} +} + +// Zero out the end if xmax+1 is not w +memset(ptr_x, 0, axmax); +channel_buffer[c] += td->channel_line_size; +} +} + +return 0; +} + ptr = p->data[0] + line * p->linesize[0] + (col * s->desc->nb_components * 2); for (i = 0; @@ -1182,6 +1235,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata, const uint8_t * a; const uint8_t *rgb[3]; +uint16_t *ptr_x; for (c = 0; c < rgb_channel_count; c++) { rgb[c] = channel_buffer[c]; @@ -1676,7 +1730,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, AVFrame *picture = data; uint8_t *ptr; -int y, ret; +int i, y, ret; +int planes; int out_line_size; int nb_blocks; /* nb scanline or nb tile */ uint64_t start_offset_table; @@ -1694,15 +1749,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, case EXR_UINT: if (s->channel_offsets[3] >= 0) { if (!s->is_luma) { -avctx->pix_fmt = AV_PIX_FMT_RGBA64; +avctx->pix_fmt = s->output_float ? AV_PIX_FMT_GBRAPF32 : AV_PIX_FMT_RGBA64; } else { -avctx->pix_fmt = AV_PIX_FMT_YA16; +/* todo: change this when a floating point pixel format with luma with alpha is implemented */ +avctx->pix_fmt = s->output_float ? AV_PIX_FMT_GBRA
[FFmpeg-devel] [PATCH 1/3] libswscale: add input support AV_PIX_FMT_GBRAPF32
From: Mark Reid --- libswscale/input.c | 91 ++ libswscale/utils.c | 4 ++ 2 files changed, 95 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index 099661cb6d..e74cf04133 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -960,6 +960,59 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, } #undef rdpx +#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): av_int2float(AV_RL32(src))) + +static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) +{ +int i; +const float **src = (const float **)_src; +uint16_t *dst= (uint16_t *)_dst; + +for (i = 0; i < width; i++) { +dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i))); +} +} + +static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) +{ +int i; +const float **src = (const float **)_src; +uint16_t *dstU = (uint16_t *)_dstU; +uint16_t *dstV = (uint16_t *)_dstV; +int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; +int bpc = 16; +int shift = 14; +for (i = 0; i < width; i++) { +int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); +int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); +int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); + +dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); +dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); +} +} + +static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv) +{ +int i; +const float **src = (const float **)_src; +uint16_t *dst= (uint16_t *)_dst; + +int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +int bpc = 16; +int shift = 14; +for (i = 0; i < width; i++) { +int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i))); +int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i))); +int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i))); + +dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); +} +} + +#undef rdpx + static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { @@ -1022,6 +1075,26 @@ rgb9plus_planar_transparency_funcs(10) rgb9plus_planar_transparency_funcs(12) rgb9plus_planar_transparency_funcs(16) +#define rgbf32_planar_funcs_endian(endian_name, endian) \ +static void planar_rgbf32##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ + int w, int32_t *rgb2yuv) \ +{ \ +planar_rgbf32_to_y(dst, src, w, endian, rgb2yuv); \ +} \ +static void planar_rgbf32##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src[4], int w, int32_t *rgb2yuv) \ +{ \ +planar_rgbf32_to_uv(dstU, dstV, src, w, endian, rgb2yuv); \ +} \ +static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4], \ + int w, int32_t *rgb2yuv) \ +{ \ +planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv); \ +} + +rgbf32_planar_funcs_endian(le, 0) +rgbf32_planar_funcs_endian(be, 1) + av_cold void ff_sws_init_input_funcs(SwsContext *c) { enum AVPixelFormat srcFormat = c->srcFormat; @@ -1070,6 +1143,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break; +case AV_PIX_FMT_GBRAPF32LE: +case AV_PIX_FMT_GBRPF32LE: +c->readChrPlanar = planar_rgbf32le_to_uv; +break; case AV_PIX_F
[FFmpeg-devel] [PATCH 2/3] libswscale: add output support for AV_PIX_FMT_GBRAPF32
From: Mark Reid --- libswscale/output.c | 82 libswscale/slice.c | 28 libswscale/swscale_unscaled.c| 33 ++ libswscale/utils.c | 8 +-- tests/ref/fate/filter-pixdesc-gbrapf32be | 1 + tests/ref/fate/filter-pixdesc-gbrapf32le | 1 + tests/ref/fate/filter-pixdesc-gbrpf32be | 1 + tests/ref/fate/filter-pixdesc-gbrpf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 ++ tests/ref/fate/filter-pixfmts-crop | 4 ++ tests/ref/fate/filter-pixfmts-field | 4 ++ tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ tests/ref/fate/filter-pixfmts-hflip | 4 ++ tests/ref/fate/filter-pixfmts-il | 4 ++ tests/ref/fate/filter-pixfmts-null | 4 ++ tests/ref/fate/filter-pixfmts-scale | 4 ++ tests/ref/fate/filter-pixfmts-transpose | 4 ++ tests/ref/fate/filter-pixfmts-vflip | 4 ++ 18 files changed, 180 insertions(+), 15 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le diff --git a/libswscale/output.c b/libswscale/output.c index 68f43ffba3..e864e515d0 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2312,6 +2312,82 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, +const int16_t **lumSrcx, int lumFilterSize, +const int16_t *chrFilter, const int16_t **chrUSrcx, +const int16_t **chrVSrcx, int chrFilterSize, +const int16_t **alpSrcx, uint8_t **dest, +int dstW, int y) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); +int i; +int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx; +uint32_t **dest32 = (uint32_t**)dest; +const int32_t **lumSrc = (const int32_t**)lumSrcx; +const int32_t **chrUSrc = (const int32_t**)chrUSrcx; +const int32_t **chrVSrc = (const int32_t**)chrVSrcx; +const int32_t **alpSrc = (const int32_t**)alpSrcx; +static const float float_mult = 1.0f / 65535.0f; + +for (i = 0; i < dstW; i++) { +int j; +int Y = -0x4000; +int U = -(128 << 23); +int V = -(128 << 23); +int R, G, B, A; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * (unsigned)chrFilter[j]; +V += chrVSrc[j][i] * (unsigned)chrFilter[j]; +} + +Y >>= 14; +Y += 0x1; +U >>= 14; +V >>= 14; + +if (hasAlpha) { +A = -0x4000; + +for (j = 0; j < lumFilterSize; j++) +A += alpSrc[j][i] * (unsigned)lumFilter[j]; + +A >>= 1; +A += 0x20002000; +} + +Y -= c->yuv2rgb_y_offset; +Y *= c->yuv2rgb_y_coeff; +Y += 1 << 13; +R = V * c->yuv2rgb_v2r_coeff; +G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; +B =U * c->yuv2rgb_u2b_coeff; + +R = av_clip_uintp2(Y + R, 30); +G = av_clip_uintp2(Y + G, 30); +B = av_clip_uintp2(Y + B, 30); + +dest32[0][i] = av_float2int(float_mult * (float)(G >> 14)); +dest32[1][i] = av_float2int(float_mult * (float)(B >> 14)); +dest32[2][i] = av_float2int(float_mult * (float)(R >> 14)); +if (hasAlpha) +dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); +} +if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { +for (i = 0; i < dstW; i++) { +dest32[0][i] = av_bswap32(dest32[0][i]); +dest32[1][i] = av_bswap32(dest32[1][i]); +dest32[2][i] = av_bswap32(dest32[2][i]); +if (hasAlpha) +dest32[3][i] = av_bswap32(dest32[3][i]); +} +} +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2716,6 +2792,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_GBRAP16LE: *yuv2anyX = yuv2gbrp16_full_X_c; break; +case AV_PIX_FMT_GBRPF32BE: +case AV_PIX_FMT_GBRPF32LE: +case AV_PIX_FMT_GBRAPF32BE: +case AV_PIX_FMT_GBRAPF32LE: +*yuv2anyX = yuv2gbrpf32_full_X_c; +break; } if (!*yuv2packedX && !*yuv2anyX) goto YUV_PACKED; diff --git a/libswscale/slice.c b/libswscale/slice.c index db4fa874ff..83b2bba443 100644 --- a/libswscale/slice.c +++ b/libswscale/slice.c @@ -189,23 +189,26 @@ int ff_init_slice_from_src(SwsSlice
[FFmpeg-devel] [PATCH 0/3] libswscale: initial input/output support for AV_PIX_FMT_GBRAPF32
From: Mark Reid Hi, The following patches add initial input/output support for planer rgb floating point pixel formats in libswscale and adds a decoder option to exr to output as float. Mark Reid (3): libswscale: add input support AV_PIX_FMT_GBRAPF32 libswscale: add output support for AV_PIX_FMT_GBRAPF32 avcodec/exr: add option to output pixels in float libavcodec/exr.c | 103 +++ libswscale/input.c | 91 libswscale/output.c | 82 ++ libswscale/slice.c | 28 +++--- libswscale/swscale_unscaled.c| 33 libswscale/utils.c | 4 + tests/ref/fate/filter-pixdesc-gbrapf32be | 1 + tests/ref/fate/filter-pixdesc-gbrapf32le | 1 + tests/ref/fate/filter-pixdesc-gbrpf32be | 1 + tests/ref/fate/filter-pixdesc-gbrpf32le | 1 + tests/ref/fate/filter-pixfmts-copy | 4 + tests/ref/fate/filter-pixfmts-crop | 4 + tests/ref/fate/filter-pixfmts-field | 4 + tests/ref/fate/filter-pixfmts-fieldorder | 4 + tests/ref/fate/filter-pixfmts-hflip | 4 + tests/ref/fate/filter-pixfmts-il | 4 + tests/ref/fate/filter-pixfmts-null | 4 + tests/ref/fate/filter-pixfmts-scale | 4 + tests/ref/fate/filter-pixfmts-transpose | 4 + tests/ref/fate/filter-pixfmts-vflip | 4 + 20 files changed, 357 insertions(+), 28 deletions(-) create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le -- 2.25.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] fate/exr: add test for YA16
From: Mark Reid Hi, I noticed there was not fate test for this support format of exr. here is the test file to add to fate https://www.dropbox.com/s/urzus0svmq1oizd/ya_scanline_zip_half_12x8.exr?dl=0 --- tests/fate/image.mak | 3 +++ tests/ref/fate/exr-ya-scanline-zip-half-12x8 | 6 ++ 2 files changed, 9 insertions(+) create mode 100644 tests/ref/fate/exr-ya-scanline-zip-half-12x8 diff --git a/tests/fate/image.mak b/tests/fate/image.mak index 121405aab9..f65119bffc 100644 --- a/tests/fate/image.mak +++ b/tests/fate/image.mak @@ -281,6 +281,9 @@ fate-exr-y-tile-zip-half-12x8: CMD = framecrc -i $(TARGET_SAMPLES)/exr/y_tile_zi FATE_EXR += fate-exr-y-scanline-zip-half-12x8 fate-exr-y-scanline-zip-half-12x8: CMD = framecrc -i $(TARGET_SAMPLES)/exr/y_scanline_zip_half_12x8.exr -pix_fmt gray16le +FATE_EXR += fate-exr-ya-scanline-zip-half-12x8 +fate-exr-ya-scanline-zip-half-12x8: CMD = framecrc -i $(TARGET_SAMPLES)/exr/ya_scanline_zip_half_12x8.exr -pix_fmt ya16le + FATE_EXR += fate-exr-rgb-scanline-half-piz-dw-t08 fate-exr-rgb-scanline-half-piz-dw-t08: CMD = framecrc -i $(TARGET_SAMPLES)/exr/rgb_scanline_half_piz_dw_t08.exr -pix_fmt rgb48le diff --git a/tests/ref/fate/exr-ya-scanline-zip-half-12x8 b/tests/ref/fate/exr-ya-scanline-zip-half-12x8 new file mode 100644 index 00..f166396164 --- /dev/null +++ b/tests/ref/fate/exr-ya-scanline-zip-half-12x8 @@ -0,0 +1,6 @@ +#tb 0: 1/25 +#media_type 0: video +#codec_id 0: rawvideo +#dimensions 0: 12x8 +#sar 0: 1/1 +0, 0, 0,1, 384, 0xcb9148cc -- 2.25.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] avcodec/exr: add cineon lin2log trc
From: Mark Reid Hi, The following patch adds a cineon lin2log color transfer characteristic to exr. The purpose of this patch is to allow preserving of the dynamic range of an exr file when converting to DPX or when using video filter such as 3d luts. I wasn't sure if adding it to the AVColorTransferCharacteristic enum was the correct approach, as this might be a exr specific thing but I figured it was a good starting point. changes since v1: - updated fate test --- libavcodec/exr.c | 2 ++ libavutil/color_utils.c| 14 ++ libavutil/pixfmt.h | 1 + tests/ref/fate/color_utils | 19 +++ 4 files changed, 36 insertions(+) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 1db30a1ae0..f2900a7921 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -1938,6 +1938,8 @@ static const AVOption options[] = { AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST2084 }, INT_MIN, INT_MAX, VD, "apply_trc_type"}, { "smpte428_1", "SMPTE ST 428-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST428_1 }, INT_MIN, INT_MAX, VD, "apply_trc_type"}, +{ "lin2log", "Default Cineon/DPX log", 0, +AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_CINE_LIN2LOG }, INT_MIN, INT_MAX, VD, "apply_trc_type"}, { NULL }, }; diff --git a/libavutil/color_utils.c b/libavutil/color_utils.c index eb8bc7b5fc..e33c019d4a 100644 --- a/libavutil/color_utils.c +++ b/libavutil/color_utils.c @@ -167,6 +167,16 @@ static double avpriv_trc_arib_std_b67(double Lc) { (Lc <= 1.0 / 12.0 ? sqrt(3.0 * Lc) : a * log(12.0 * Lc - b) + c); } +static double avpriv_trc_cine_lin2log(double Lc) { +const double blackpoint = 95.0; +const double whitepoint = 685.0; +const double gamma = 0.6; +const double offset = pow(10, (blackpoint - whitepoint) * 0.002 / gamma); +const double gain = 1.0 / (1.0 - offset); + +return (log10((Lc + offset) / gain) / (0.002 / gamma) + whitepoint ) / 1023.0; +} + avpriv_trc_function avpriv_get_trc_function_from_trc(enum AVColorTransferCharacteristic trc) { avpriv_trc_function func = NULL; @@ -225,6 +235,10 @@ avpriv_trc_function avpriv_get_trc_function_from_trc(enum AVColorTransferCharact func = avpriv_trc_arib_std_b67; break; +case AVCOL_TRC_CINE_LIN2LOG: +func = avpriv_trc_cine_lin2log; +break; + case AVCOL_TRC_RESERVED0: case AVCOL_TRC_UNSPECIFIED: case AVCOL_TRC_RESERVED: diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 1c625cfc8a..1f3f9988d7 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -499,6 +499,7 @@ enum AVColorTransferCharacteristic { AVCOL_TRC_SMPTE428 = 17, ///< SMPTE ST 428-1 AVCOL_TRC_SMPTEST428_1 = AVCOL_TRC_SMPTE428, AVCOL_TRC_ARIB_STD_B67 = 18, ///< ARIB STD-B67, known as "Hybrid log-gamma" +AVCOL_TRC_CINE_LIN2LOG = 19, ///< Default Cineon/DPX linear to log 1D curve AVCOL_TRC_NB ///< Not part of ABI }; diff --git a/tests/ref/fate/color_utils b/tests/ref/fate/color_utils index 10f8055916..41221c131a 100644 --- a/tests/ref/fate/color_utils +++ b/tests/ref/fate/color_utils @@ -302,3 +302,22 @@ AVColorTransferCharacteristic=18 calling func(15123.456700) expected=2.725380 AVColorTransferCharacteristic=18 calling func(19845.889230) expected=2.773978 AVColorTransferCharacteristic=18 calling func(98678.423100) expected=3.060803 AVColorTransferCharacteristic=18 calling func(9.88) expected=3.063182 +AVColorTransferCharacteristic=19 calling func(-0.10) expected=nan +AVColorTransferCharacteristic=19 calling func(-0.018054) expected=nan +AVColorTransferCharacteristic=19 calling func(-0.01) expected=-0.240327 +AVColorTransferCharacteristic=19 calling func(-0.004490) expected=0.023018 +AVColorTransferCharacteristic=19 calling func(0.00) expected=0.091481 +AVColorTransferCharacteristic=19 calling func(0.003162) expected=0.124195 +AVColorTransferCharacteristic=19 calling func(0.005000) expected=0.139945 +AVColorTransferCharacteristic=19 calling func(0.009000) expected=0.168690 +AVColorTransferCharacteristic=19 calling func(0.015000) expected=0.202405 +AVColorTransferCharacteristic=19 calling func(0.10) expected=0.388020 +AVColorTransferCharacteristic=19 calling func(1.00) expected=0.669584 +AVColorTransferCharacteristic=19 calling func(52.37) expected=1.172373 +AVColorTransferCharacteristic=19 calling func(125.098765) expected=1.283258 +AVColorTransferCharacteristic=19 calling func(1999.111230) expected=1.636205 +AVColorTransferCharacteristic=19 calling func(6945.443000) expected=1.794815 +AVColorTransferCharacteristic=19 calling func(15123.456700) expected=1.893921 +AVColorTransferCharacteristic=19 calling func(19845.889230) expected=1.928531 +AVColorTransferCharacteristic=19 calling func(98678.423100) expected=2.132798 +AVColorTransferCharacteristic=19 calling func(9.88) expected=2.134492 -- 2.2
[FFmpeg-devel] [PATCH] avcodec/exr: add cineon lin2log trc
From: Mark Reid Hi, The following patch adds a cineon lin2log color transfer characteristic to exr. The purpose of this patch is to allow preserving of the dynamic range of an exr file when converting to DPX or when using video filter such as 3d luts. I wasn't sure if adding it to the AVColorTransferCharacteristic enum was the correct approach as this might be a exr specific thing but I figured it was a good starting point. --- libavcodec/exr.c| 2 ++ libavutil/color_utils.c | 14 ++ libavutil/pixfmt.h | 1 + 3 files changed, 17 insertions(+) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 1db30a1ae0..f2900a7921 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -1938,6 +1938,8 @@ static const AVOption options[] = { AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST2084 }, INT_MIN, INT_MAX, VD, "apply_trc_type"}, { "smpte428_1", "SMPTE ST 428-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST428_1 }, INT_MIN, INT_MAX, VD, "apply_trc_type"}, +{ "lin2log", "Default Cineon/DPX log", 0, +AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_CINE_LIN2LOG }, INT_MIN, INT_MAX, VD, "apply_trc_type"}, { NULL }, }; diff --git a/libavutil/color_utils.c b/libavutil/color_utils.c index eb8bc7b5fc..e33c019d4a 100644 --- a/libavutil/color_utils.c +++ b/libavutil/color_utils.c @@ -167,6 +167,16 @@ static double avpriv_trc_arib_std_b67(double Lc) { (Lc <= 1.0 / 12.0 ? sqrt(3.0 * Lc) : a * log(12.0 * Lc - b) + c); } +static double avpriv_trc_cine_lin2log(double Lc) { +const double blackpoint = 95.0; +const double whitepoint = 685.0; +const double gamma = 0.6; +const double offset = pow(10, (blackpoint - whitepoint) * 0.002 / gamma); +const double gain = 1.0 / (1.0 - offset); + +return (log10((Lc + offset) / gain) / (0.002 / gamma) + whitepoint ) / 1023.0; +} + avpriv_trc_function avpriv_get_trc_function_from_trc(enum AVColorTransferCharacteristic trc) { avpriv_trc_function func = NULL; @@ -225,6 +235,10 @@ avpriv_trc_function avpriv_get_trc_function_from_trc(enum AVColorTransferCharact func = avpriv_trc_arib_std_b67; break; +case AVCOL_TRC_CINE_LIN2LOG: +func = avpriv_trc_cine_lin2log; +break; + case AVCOL_TRC_RESERVED0: case AVCOL_TRC_UNSPECIFIED: case AVCOL_TRC_RESERVED: diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 1c625cfc8a..1f3f9988d7 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -499,6 +499,7 @@ enum AVColorTransferCharacteristic { AVCOL_TRC_SMPTE428 = 17, ///< SMPTE ST 428-1 AVCOL_TRC_SMPTEST428_1 = AVCOL_TRC_SMPTE428, AVCOL_TRC_ARIB_STD_B67 = 18, ///< ARIB STD-B67, known as "Hybrid log-gamma" +AVCOL_TRC_CINE_LIN2LOG = 19, ///< Default Cineon/DPX linear to log 1D curve AVCOL_TRC_NB ///< Not part of ABI }; -- 2.21.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 2/2] fate/mxf: add mxf user comments tests
From: Mark Reid --- tests/fate/mxf.mak | 15 ++- tests/ref/fate/mxf-d10-user-comments| 1 + tests/ref/fate/mxf-opatom-user-comments | 1 + tests/ref/fate/mxf-user-comments| 1 + 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/ref/fate/mxf-d10-user-comments create mode 100644 tests/ref/fate/mxf-opatom-user-comments create mode 100644 tests/ref/fate/mxf-user-comments diff --git a/tests/fate/mxf.mak b/tests/fate/mxf.mak index dce23d522e..62e4ec01cb 100644 --- a/tests/fate/mxf.mak +++ b/tests/fate/mxf.mak @@ -37,9 +37,22 @@ FATE_MXF_REEL_NAME-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-reel_ fate-mxf-reel_name: $(TARGET_SAMPLES)/mxf/Sony-1.mxf fate-mxf-reel_name: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-1.mxf -c copy -timecode 00:00:00:00 -metadata "reel_name=test_reel" -fflags +bitexact -f mxf +FATE_MXF_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-user-comments +fate-mxf-user-comments: $(TARGET_SAMPLES)/mxf/Sony-1.mxf +fate-mxf-user-comments: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-1.mxf -c copy -metadata "comment_test=value" -fflags +bitexact -f mxf + +FATE_MXF_D10_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-d10-user-comments +fate-mxf-d10-user-comments: $(TARGET_SAMPLES)/mxf/Sony-1.mxf +fate-mxf-d10-user-comments: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-1.mxf -c copy -metadata "comment_test=value" -store_user_comments 1 -fflags +bitexact -f mxf_d10 + +FATE_MXF_OPATOM_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-opatom-user-comments +fate-mxf-opatom-user-comments: $(TARGET_SAMPLES)/mxf/Sony-1.mxf +fate-mxf-opatom-user-comments: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-1.mxf -an -vcodec copy -metadata "comment_test=value" -fflags +bitexact -f mxf_opatom + FATE_MXF-$(CONFIG_MXF_DEMUXER) += $(FATE_MXF) FATE_SAMPLES_AVCONV += $(FATE_MXF-yes) $(FATE_MXF_REEL_NAME-yes) +FATE_SAMPLES_AVCONV += $(FATE_MXF_USER_COMMENTS-yes) $(FATE_MXF_D10_USER_COMMENTS-yes) $(FATE_MXF_OPATOM_USER_COMMENTS-yes) FATE_SAMPLES_FFPROBE += $(FATE_MXF_PROBE-yes) -fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes) +fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes) $(FATE_MXF_USER_COMMENTS-yes) $(FATE_MXF_D10_USER_COMMENTS-yes) $(FATE_MXF_OPATOM_USER_COMMENTS-yes) diff --git a/tests/ref/fate/mxf-d10-user-comments b/tests/ref/fate/mxf-d10-user-comments new file mode 100644 index 00..e78765020c --- /dev/null +++ b/tests/ref/fate/mxf-d10-user-comments @@ -0,0 +1 @@ +b659c1204f8d04e2a5607af083590dca diff --git a/tests/ref/fate/mxf-opatom-user-comments b/tests/ref/fate/mxf-opatom-user-comments new file mode 100644 index 00..1834b9e074 --- /dev/null +++ b/tests/ref/fate/mxf-opatom-user-comments @@ -0,0 +1 @@ +892cf02e44bf7d61b6d6f01e41db9375 diff --git a/tests/ref/fate/mxf-user-comments b/tests/ref/fate/mxf-user-comments new file mode 100644 index 00..4b734a0f85 --- /dev/null +++ b/tests/ref/fate/mxf-user-comments @@ -0,0 +1 @@ +683bacb0105e5bc5bbf46aa430c644d1 -- 2.21.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH v2 1/2] avformat/mxfenc: allow user comments for opatom muxer
From: Mark Reid --- doc/muxers.texi | 4 ++-- libavformat/mxfenc.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/muxers.texi b/doc/muxers.texi index 372fab2f92..aac7d94edf 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -1629,7 +1629,7 @@ ffmpeg -i file.mpg -c copy \ out.ts @end example -@section mxf, mxf_d10 +@section mxf, mxf_d10, mxf_opatom MXF muxer. @@ -1641,7 +1641,7 @@ The muxer options are: @item store_user_comments @var{bool} Set if user comments should be stored if available or never. IRT D-10 does not allow user comments. The default is thus to write them for -mxf but not for mxf_d10 +mxf and mxf_opatom but not for mxf_d10 @end table @section null diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c index 032ee3bf3d..8c6db94865 100644 --- a/libavformat/mxfenc.c +++ b/libavformat/mxfenc.c @@ -3095,6 +3095,8 @@ static const AVOption opatom_options[] = { { "mxf_audio_edit_rate", "Audio edit rate for timecode", offsetof(MXFContext, audio_edit_rate), AV_OPT_TYPE_RATIONAL, {.dbl=25}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, MXF_COMMON_OPTIONS +{ "store_user_comments", "", + offsetof(MXFContext, store_user_comments), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM}, { NULL }, }; -- 2.21.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] avformat/mxfenc: allow user comments for opatom muxer
From: Mark Reid This patch restores the ability to add user comments for the opatom_mxf muxer. The ability seems to have been disabled in d9726893f31. --- doc/muxers.texi | 2 +- libavformat/mxfenc.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/muxers.texi b/doc/muxers.texi index 372fab2f92..764102bf4b 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -1629,7 +1629,7 @@ ffmpeg -i file.mpg -c copy \ out.ts @end example -@section mxf, mxf_d10 +@section mxf, mxf_d10, mxf_opatom MXF muxer. diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c index 032ee3bf3d..8c6db94865 100644 --- a/libavformat/mxfenc.c +++ b/libavformat/mxfenc.c @@ -3095,6 +3095,8 @@ static const AVOption opatom_options[] = { { "mxf_audio_edit_rate", "Audio edit rate for timecode", offsetof(MXFContext, audio_edit_rate), AV_OPT_TYPE_RATIONAL, {.dbl=25}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, MXF_COMMON_OPTIONS +{ "store_user_comments", "", + offsetof(MXFContext, store_user_comments), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM}, { NULL }, }; -- 2.18.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel