from:"\"mindmark\""

[FFmpeg-devel] [PATCH v3 4/4] swscale/bswapdsp: copy over bswapdsp from avcodec

2022-12-12 Thread mindmark

From: Mark Reid 

There are some places in input.c that could use it too
but they aren't currently being pass the SwsContext
---
 libswscale/Makefile  |   1 +
 libswscale/bswapdsp.c|  59 
 libswscale/bswapdsp.h|  66 +
 libswscale/output.c  |  36 +++
 libswscale/riscv/Makefile|   7 +-
 libswscale/riscv/bswapdsp_init.c |  46 +
 libswscale/riscv/bswapdsp_rvb.S  |  68 +
 libswscale/riscv/bswapdsp_rvv.S  |  62 
 libswscale/swscale_internal.h|   3 +
 libswscale/swscale_unscaled.c|  26 ++---
 libswscale/utils.c   |   2 +
 libswscale/x86/Makefile  |   6 +-
 libswscale/x86/bswapdsp.asm  | 157 +++
 libswscale/x86/bswapdsp_init.c   |  40 
 14 files changed, 537 insertions(+), 42 deletions(-)
 create mode 100644 libswscale/bswapdsp.c
 create mode 100644 libswscale/bswapdsp.h
 create mode 100644 libswscale/riscv/bswapdsp_init.c
 create mode 100644 libswscale/riscv/bswapdsp_rvb.S
 create mode 100644 libswscale/riscv/bswapdsp_rvv.S
 create mode 100644 libswscale/x86/bswapdsp.asm
 create mode 100644 libswscale/x86/bswapdsp_init.c

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 757997b401..4a916739c3 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -6,6 +6,7 @@ HEADERS = swscale.h 
\
   version_major.h   \
 
 OBJS = alphablend.o \
+   bswapdsp.o   \
hscale.o \
hscale_fast_bilinear.o   \
gamma.o  \
diff --git a/libswscale/bswapdsp.c b/libswscale/bswapdsp.c
new file mode 100644
index 00..a164d89a76
--- /dev/null
+++ b/libswscale/bswapdsp.c
@@ -0,0 +1,59 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "libavutil/attributes.h"
+#include "libavutil/bswap.h"
+#include "bswapdsp.h"
+
+static void bswap32_buf(uint32_t *dst, const uint32_t *src, int len)
+{
+int i;
+
+for (i = 0; i + 8 <= len; i += 8) {
+dst[i + 0] = av_bswap32(src[i + 0]);
+dst[i + 1] = av_bswap32(src[i + 1]);
+dst[i + 2] = av_bswap32(src[i + 2]);
+dst[i + 3] = av_bswap32(src[i + 3]);
+dst[i + 4] = av_bswap32(src[i + 4]);
+dst[i + 5] = av_bswap32(src[i + 5]);
+dst[i + 6] = av_bswap32(src[i + 6]);
+dst[i + 7] = av_bswap32(src[i + 7]);
+}
+for (; i < len; i++)
+dst[i + 0] = av_bswap32(src[i + 0]);
+}
+
+static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
+{
+while (len--)
+*dst++ = av_bswap16(*src++);
+}
+
+av_cold void ff_sws_bswapdsp_init(BswapDSPContext *c)
+{
+c->bswap32_buf = bswap32_buf;
+c->bswap16_buf = bswap16_buf;
+
+#if ARCH_RISCV
+ff_sws_bswapdsp_init_riscv(c);
+#elif ARCH_X86
+ff_sws_bswapdsp_init_x86(c);
+#endif
+}
diff --git a/libswscale/bswapdsp.h b/libswscale/bswapdsp.h
new file mode 100644
index 00..f2e12d1b8f
--- /dev/null
+++ b/libswscale/bswapdsp.h
@@ -0,0 +1,66 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWSCALE_BSWAPDSP_H
+#define SWSCALE_BSWAPDSP_H
+
+#include 
+
+/**
+ * @file
+ * Optimized buffer byte swapping routines.
+ */
+
+typedef struct BswapDSPContext {
+/**
+ * Byte swap 32 bit elements in

[FFmpeg-devel] [PATCH v3 3/4] avcodec/bswapdsp: add documentation

2022-12-12 Thread mindmark

From: Mark Reid 

---
 libavcodec/bswapdsp.h | 33 +
 1 file changed, 33 insertions(+)

diff --git a/libavcodec/bswapdsp.h b/libavcodec/bswapdsp.h
index 10519cfd2e..ecc37d2c56 100644
--- a/libavcodec/bswapdsp.h
+++ b/libavcodec/bswapdsp.h
@@ -21,11 +21,44 @@
 
 #include 
 
+/**
+ * @file
+ * Optimized buffer byte swapping routines.
+ */
+
 typedef struct BswapDSPContext {
+/**
+ * Byte swap 32 bit elements in a buffer.
+
+ * @param dst Destination buffer.
+ * @param src Source buffer, may be the same as dst.
+ * @param len The number of elements in the buffer.
+ *
+ */
+ /** @{ */
 void (*bswap32_buf)(uint32_t *dst, const uint32_t *src, int len);
+/** @} */
+
+/**
+ * Byte swap 16 bit elements in a buffer.
+ *
+ * @param dst Destination buffer.
+ * @param src Source buffer, may be the same as dst.
+ * @param len The number of elements in the buffer.
+ *
+ */
+/** @{ */
 void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
+/** @} */
 } BswapDSPContext;
 
+
+/**
+ * Initialize BswapDSPContext function pointers.
+ *
+ * @param c pointer to BswapDSPContext
+ *
+ */
 void ff_bswapdsp_init(BswapDSPContext *c);
 void ff_bswapdsp_init_riscv(BswapDSPContext *c);
 void ff_bswapdsp_init_x86(BswapDSPContext *c);
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v3 2/4] avcodec/bswapdsp: rename bswap_buf to bswap_buf32

2022-12-12 Thread mindmark

From: Mark Reid 

---
 libavcodec/4xm.c |  8 
 libavcodec/alsdec.c  |  6 +++---
 libavcodec/apedec.c  |  4 ++--
 libavcodec/asvdec.c  |  4 ++--
 libavcodec/asvenc.c  |  4 ++--
 libavcodec/bswapdsp.c|  8 
 libavcodec/bswapdsp.h|  2 +-
 libavcodec/eatqi.c   |  4 ++--
 libavcodec/fraps.c   |  4 ++--
 libavcodec/huffyuvdec.c  |  8 
 libavcodec/huffyuvenc.c  |  2 +-
 libavcodec/imm4.c|  6 +++---
 libavcodec/mimic.c   |  6 +++---
 libavcodec/motionpixels.c|  4 ++--
 libavcodec/mpc7.c|  4 ++--
 libavcodec/rawdec.c  |  2 +-
 libavcodec/riscv/bswapdsp_init.c |  4 ++--
 libavcodec/truemotion2.c |  4 ++--
 libavcodec/truespeech.c  |  2 +-
 libavcodec/utvideodec.c  | 12 ++--
 libavcodec/utvideoenc.c  |  6 +++---
 libavcodec/x86/bswapdsp.asm  |  2 +-
 libavcodec/x86/bswapdsp_init.c   |  6 +++---
 libavcodec/ylc.c | 12 ++--
 tests/checkasm/bswapdsp.c|  2 +-
 25 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index 5636fdef2d..a26b2f9004 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -469,8 +469,8 @@ static int decode_p_frame(FourXContext *f, const uint8_t 
*buf, int length)
   bitstream_size);
 if (!f->bitstream_buffer)
 return AVERROR(ENOMEM);
-f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
-   bitstream_size / 4);
+f->bbdsp.bswap32_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
+ bitstream_size / 4);
 init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 
 wordstream_offset = extra + bitstream_size;
@@ -813,8 +813,8 @@ static int decode_i_frame(FourXContext *f, const uint8_t 
*buf, int length)
   prestream_size);
 if (!f->bitstream_buffer)
 return AVERROR(ENOMEM);
-f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
-   prestream_size / 4);
+f->bbdsp.bswap32_buf(f->bitstream_buffer, (const uint32_t *) prestream,
+ prestream_size / 4);
 init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 
 f->last_dc = 0 * 128 * 8 * 8;
diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index 4605b2248f..6f401cf890 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -1899,9 +1899,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame 
*frame,
  sample++)
 *dest++ = av_bswap16(src[sample]);
 } else {
-ctx->bdsp.bswap_buf((uint32_t *) ctx->crc_buffer,
-(uint32_t *) frame->data[0],
-ctx->cur_frame_length * channels);
+ctx->bdsp.bswap32_buf((uint32_t *) ctx->crc_buffer,
+  (uint32_t *) frame->data[0],
+  ctx->cur_frame_length * channels);
 }
 crc_source = ctx->crc_buffer;
 } else {
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index c08d13d6c2..b7856c82e5 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -1499,8 +1499,8 @@ static int ape_decode_frame(AVCodecContext *avctx, 
AVFrame *frame,
 av_fast_padded_malloc(&s->data, &s->data_size, buf_size);
 if (!s->data)
 return AVERROR(ENOMEM);
-s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf,
-  buf_size >> 2);
+s->bdsp.bswap32_buf((uint32_t *) s->data, (const uint32_t *) buf,
+buf_size >> 2);
 memset(s->data + (buf_size & ~3), 0, buf_size & 3);
 s->ptr = s->data;
 s->data_end = s->data + buf_size;
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index 699aab9f8f..25dab7473f 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -253,8 +253,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 if (!a->bitstream_buffer)
 return AVERROR(ENOMEM);
 
-c->bbdsp.bswap_buf((uint32_t *) a->bitstream_buffer,
-   (const uint32_t *) buf, buf_size / 4);
+c->bbdsp.bswap32_buf((uint32_t *) a->bitstream_buffer,
+ (const uint32_t *) buf, buf_size / 4);
 ret = init_get_bits8(&a->gb, a->bitstream_buffer, buf_size);
 } else {
 ret = init_get_bits8_le(&a->gb, buf, buf_size);
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index 9da7cbb986..2bf67d686c 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -309,8 +309,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket 
*pkt,

[FFmpeg-devel] [PATCH v3 1/4] avcodec/bswapdsp: remove unused cextern

2022-12-12 Thread mindmark

From: Mark Reid 

---
 libavcodec/x86/bswapdsp.asm | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 31c6c48a21..2aa235e13c 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -26,8 +26,6 @@
 SECTION_RODATA
 pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 
-cextern pb_80
-
 SECTION .text
 
 ; %1 = aligned/unaligned
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 1/2] avutil: move bswapdsp from avcodec to avutil

2022-12-05 Thread mindmark

From: Mark Reid 

Also renamed bswap_buf to bswap32_buf
---
 configure | 56 ---
 libavcodec/4xm.c  | 14 ++---
 libavcodec/Makefile   |  1 -
 libavcodec/ac3dec.c   |  4 +-
 libavcodec/ac3dec.h   |  4 +-
 libavcodec/alsdec.c   | 12 ++--
 libavcodec/apedec.c   | 10 ++--
 libavcodec/asv.c  |  4 +-
 libavcodec/asv.h  |  4 +-
 libavcodec/asvdec.c   |  4 +-
 libavcodec/asvenc.c   |  4 +-
 libavcodec/cllc.c |  6 +-
 libavcodec/eamad.c|  6 +-
 libavcodec/eatqi.c| 10 ++--
 libavcodec/exr.c  |  6 +-
 libavcodec/flacenc.c  |  6 +-
 libavcodec/fraps.c| 10 ++--
 libavcodec/hevcdec.c  |  4 +-
 libavcodec/hevcdec.h  |  4 +-
 libavcodec/huffyuvdec.c   | 14 ++---
 libavcodec/huffyuvenc.c   |  8 +--
 libavcodec/imc.c  |  6 +-
 libavcodec/imm4.c | 12 ++--
 libavcodec/mdec.c |  6 +-
 libavcodec/mimic.c| 12 ++--
 libavcodec/mobiclip.c |  6 +-
 libavcodec/motionpixels.c | 10 ++--
 libavcodec/mpc.h  |  4 +-
 libavcodec/mpc7.c |  8 +--
 libavcodec/rawdec.c   |  8 +--
 libavcodec/riscv/Makefile |  3 -
 libavcodec/shorten.c  |  7 ++-
 libavcodec/truemotion2.c  | 11 ++--
 libavcodec/truespeech.c   |  8 +--
 libavcodec/utvideo.h  |  4 +-
 libavcodec/utvideodec.c   | 16 +++---
 libavcodec/utvideoenc.c   | 10 ++--
 libavcodec/x86/Makefile   |  2 -
 libavcodec/ylc.c  | 18 +++---
 libavutil/Makefile|  2 +
 {libavcodec => libavutil}/bswapdsp.c  | 17 +++---
 {libavcodec => libavutil}/bswapdsp.h  | 16 +++---
 libavutil/riscv/Makefile  |  7 ++-
 .../riscv/bswapdsp_init.c |  9 +--
 .../riscv/bswapdsp_rvb.S  |  0
 .../riscv/bswapdsp_rvv.S  |  0
 libavutil/version.h   |  2 +-
 libavutil/x86/Makefile|  8 ++-
 {libavcodec => libavutil}/x86/bswapdsp.asm|  4 +-
 {libavcodec => libavutil}/x86/bswapdsp_init.c | 17 +++---
 tests/checkasm/Makefile   |  2 +-
 tests/checkasm/bswapdsp.c |  8 +--
 tests/checkasm/checkasm.c |  4 +-
 53 files changed, 216 insertions(+), 222 deletions(-)
 rename {libavcodec => libavutil}/bswapdsp.c (80%)
 rename {libavcodec => libavutil}/bswapdsp.h (71%)
 rename {libavcodec => libavutil}/riscv/bswapdsp_init.c (85%)
 rename {libavcodec => libavutil}/riscv/bswapdsp_rvb.S (100%)
 rename {libavcodec => libavutil}/riscv/bswapdsp_rvv.S (100%)
 rename {libavcodec => libavutil}/x86/bswapdsp.asm (97%)
 rename {libavcodec => libavutil}/x86/bswapdsp_init.c (80%)

diff --git a/configure b/configure
index f4eedfc207..03b502ea37 100755
--- a/configure
+++ b/configure
@@ -2421,7 +2421,6 @@ CONFIG_EXTRA="
 audio_frame_queue
 audiodsp
 blockdsp
-bswapdsp
 cabac
 cbs
 cbs_av1
@@ -2749,8 +2748,8 @@ aac_decoder_select="adts_header mpeg4audio sinewin"
 aac_fixed_decoder_select="adts_header mpeg4audio"
 aac_encoder_select="audio_frame_queue iirfilter lpc sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
-ac3_decoder_select="ac3_parser ac3dsp bswapdsp fmtconvert"
-ac3_fixed_decoder_select="ac3_parser ac3dsp bswapdsp"
+ac3_decoder_select="ac3_parser ac3dsp fmtconvert"
+ac3_fixed_decoder_select="ac3_parser ac3dsp"
 ac3_encoder_select="ac3dsp audiodsp me_cmp"
 ac3_fixed_encoder_select="ac3dsp audiodsp me_cmp"
 acelp_kelvin_decoder_select="audiodsp"
@@ -2759,20 +2758,20 @@ adpcm_g722_encoder_select="g722dsp"
 agm_decoder_select="idctdsp"
 aic_decoder_select="golomb idctdsp"
 alac_encoder_select="lpc"
-als_decoder_select="bswapdsp mpeg4audio"
+als_decoder_select="mpeg4audio"
 amrnb_decoder_select="lsp"
 amrwb_decoder_select="lsp"
 amv_decoder_select="sp5x_decoder exif"
 amv_encoder_select="jpegtables mpegvideoenc"
-ape_decoder_select="bswapdsp llauddsp"
+ape_decoder_select="llauddsp"
 apng_decoder_select="inflate_wrapper"
 apng_encoder_select="deflate_wrapper llvidencdsp"
 aptx_encoder_select="audio_frame_queue"
 aptx_hd_encoder_select="audio_frame_queue"
-asv1_decoder_select="blockdsp bswapdsp idctdsp"
-asv1_enc

[FFmpeg-devel] [PATCH v2 2/2] libswscale: add AVBSwapDSPContext and use

2022-12-05 Thread mindmark

From: Mark Reid 

There are some places in input.c that could use it too
but they aren't currently being pass the SwsContext
---
 libswscale/output.c   | 36 +++
 libswscale/swscale_internal.h |  3 +++
 libswscale/swscale_unscaled.c | 26 +
 libswscale/utils.c|  2 ++
 4 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..cd44081e3d 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2313,13 +2313,11 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
-for (i = 0; i < dstW; i++) {
-dest16[0][i] = av_bswap16(dest16[0][i]);
-dest16[1][i] = av_bswap16(dest16[1][i]);
-dest16[2][i] = av_bswap16(dest16[2][i]);
-if (hasAlpha)
-dest16[3][i] = av_bswap16(dest16[3][i]);
-}
+c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW);
+c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW);
+c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW);
+if (hasAlpha)
+c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW);
 }
 }
 
@@ -2385,13 +2383,11 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 dest16[3][i] = av_clip_uintp2(A, 30) >> 14;
 }
 if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
-for (i = 0; i < dstW; i++) {
-dest16[0][i] = av_bswap16(dest16[0][i]);
-dest16[1][i] = av_bswap16(dest16[1][i]);
-dest16[2][i] = av_bswap16(dest16[2][i]);
-if (hasAlpha)
-dest16[3][i] = av_bswap16(dest16[3][i]);
-}
+c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW);
+c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW);
+c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW);
+if (hasAlpha)
+c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW);
 }
 }
 
@@ -2461,13 +2457,11 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 
30) >> 14));
 }
 if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
-for (i = 0; i < dstW; i++) {
-dest32[0][i] = av_bswap32(dest32[0][i]);
-dest32[1][i] = av_bswap32(dest32[1][i]);
-dest32[2][i] = av_bswap32(dest32[2][i]);
-if (hasAlpha)
-dest32[3][i] = av_bswap32(dest32[3][i]);
-}
+c->bsdsp.bswap32_buf(dest32[0], dest32[0], dstW);
+c->bsdsp.bswap32_buf(dest32[1], dest32[1], dstW);
+c->bsdsp.bswap32_buf(dest32[2], dest32[2], dstW);
+if (hasAlpha)
+c->bsdsp.bswap32_buf(dest32[3], dest32[3], dstW);
 }
 }
 
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index abeebbb002..400f0bc8ed 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -26,6 +26,7 @@
 #include "config.h"
 
 #include "libavutil/avassert.h"
+#include "libavutil/bswapdsp.h"
 #include "libavutil/common.h"
 #include "libavutil/frame.h"
 #include "libavutil/intreadwrite.h"
@@ -682,6 +683,8 @@ typedef struct SwsContext {
 atomic_int   data_unaligned_warned;
 
 Half2FloatTables *h2f_tables;
+
+AVBSwapDSPContext bsdsp;
 } SwsContext;
 //FIXME check init (where 0)
 
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 9af2e7ecc3..0010ab24d1 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -468,7 +468,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
   int srcStride[], int srcSliceY, int srcSliceH,
   uint8_t *dst[], int dstStride[])
 {
-int i, j, p;
+int i, p;
 
 for (p = 0; p < 4; p++) {
 int srcstr = srcStride[p] / 2;
@@ -480,9 +480,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
 continue;
 dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr;
 for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) {
-for (j = 0; j < min_stride; j++) {
-dstPtr[j] = av_bswap16(srcPtr[j]);
-}
+c->bsdsp.bswap16_buf(dstPtr, srcPtr, min_stride);
 srcPtr += srcstr;
 dstPtr += dststr;
 }
@@ -495,7 +493,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[],
   int srcStride[], int srcSliceY, int srcSliceH,
   uint8_t *dst[], int dstStride[])
 {
-int i, j, p;
+int i, p;
 
 for (p = 0; p < 4; p++) {
 int srcstr = srcStride[p] / 4;
@@ -507,9 +505,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[],
 continue;
 dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr;
 for (i = 0; i < (srcSliceH >> c

[FFmpeg-devel] [PATCH 1/2] avutil: move bswapdsp from avcodec to avutil

2022-12-04 Thread mindmark

From: Mark Reid 

Also renamed bswap_buf to bswap32_buf
---
 configure | 56 ---
 libavcodec/4xm.c  | 14 ++---
 libavcodec/Makefile   |  1 -
 libavcodec/ac3dec.c   |  4 +-
 libavcodec/ac3dec.h   |  4 +-
 libavcodec/alsdec.c   | 12 ++--
 libavcodec/apedec.c   | 10 ++--
 libavcodec/asv.c  |  4 +-
 libavcodec/asv.h  |  4 +-
 libavcodec/asvdec.c   |  4 +-
 libavcodec/asvenc.c   |  4 +-
 libavcodec/cllc.c |  6 +-
 libavcodec/eamad.c|  6 +-
 libavcodec/eatqi.c| 10 ++--
 libavcodec/exr.c  |  6 +-
 libavcodec/flacenc.c  |  6 +-
 libavcodec/fraps.c| 10 ++--
 libavcodec/hevcdec.c  |  4 +-
 libavcodec/hevcdec.h  |  4 +-
 libavcodec/huffyuvdec.c   | 14 ++---
 libavcodec/huffyuvenc.c   |  8 +--
 libavcodec/imc.c  |  6 +-
 libavcodec/imm4.c | 12 ++--
 libavcodec/mdec.c |  6 +-
 libavcodec/mimic.c| 12 ++--
 libavcodec/mobiclip.c |  6 +-
 libavcodec/motionpixels.c | 10 ++--
 libavcodec/mpc.h  |  4 +-
 libavcodec/mpc7.c |  8 +--
 libavcodec/rawdec.c   |  8 +--
 libavcodec/riscv/Makefile |  3 -
 libavcodec/shorten.c  |  7 ++-
 libavcodec/truemotion2.c  | 11 ++--
 libavcodec/truespeech.c   |  8 +--
 libavcodec/utvideo.h  |  4 +-
 libavcodec/utvideodec.c   | 16 +++---
 libavcodec/utvideoenc.c   | 10 ++--
 libavcodec/x86/Makefile   |  2 -
 libavcodec/ylc.c  | 18 +++---
 libavutil/Makefile|  2 +
 {libavcodec => libavutil}/bswapdsp.c  | 17 +++---
 {libavcodec => libavutil}/bswapdsp.h  | 16 +++---
 libavutil/riscv/Makefile  |  7 ++-
 .../riscv/bswapdsp_init.c |  9 +--
 .../riscv/bswapdsp_rvb.S  |  0
 .../riscv/bswapdsp_rvv.S  |  0
 libavutil/version.h   |  2 +-
 libavutil/x86/Makefile|  8 ++-
 {libavcodec => libavutil}/x86/bswapdsp.asm|  2 +-
 {libavcodec => libavutil}/x86/bswapdsp_init.c | 17 +++---
 tests/checkasm/Makefile   |  2 +-
 tests/checkasm/bswapdsp.c |  8 +--
 tests/checkasm/checkasm.c |  4 +-
 53 files changed, 216 insertions(+), 220 deletions(-)
 rename {libavcodec => libavutil}/bswapdsp.c (80%)
 rename {libavcodec => libavutil}/bswapdsp.h (71%)
 rename {libavcodec => libavutil}/riscv/bswapdsp_init.c (85%)
 rename {libavcodec => libavutil}/riscv/bswapdsp_rvb.S (100%)
 rename {libavcodec => libavutil}/riscv/bswapdsp_rvv.S (100%)
 rename {libavcodec => libavutil}/x86/bswapdsp.asm (98%)
 rename {libavcodec => libavutil}/x86/bswapdsp_init.c (80%)

diff --git a/configure b/configure
index f4eedfc207..03b502ea37 100755
--- a/configure
+++ b/configure
@@ -2421,7 +2421,6 @@ CONFIG_EXTRA="
 audio_frame_queue
 audiodsp
 blockdsp
-bswapdsp
 cabac
 cbs
 cbs_av1
@@ -2749,8 +2748,8 @@ aac_decoder_select="adts_header mpeg4audio sinewin"
 aac_fixed_decoder_select="adts_header mpeg4audio"
 aac_encoder_select="audio_frame_queue iirfilter lpc sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
-ac3_decoder_select="ac3_parser ac3dsp bswapdsp fmtconvert"
-ac3_fixed_decoder_select="ac3_parser ac3dsp bswapdsp"
+ac3_decoder_select="ac3_parser ac3dsp fmtconvert"
+ac3_fixed_decoder_select="ac3_parser ac3dsp"
 ac3_encoder_select="ac3dsp audiodsp me_cmp"
 ac3_fixed_encoder_select="ac3dsp audiodsp me_cmp"
 acelp_kelvin_decoder_select="audiodsp"
@@ -2759,20 +2758,20 @@ adpcm_g722_encoder_select="g722dsp"
 agm_decoder_select="idctdsp"
 aic_decoder_select="golomb idctdsp"
 alac_encoder_select="lpc"
-als_decoder_select="bswapdsp mpeg4audio"
+als_decoder_select="mpeg4audio"
 amrnb_decoder_select="lsp"
 amrwb_decoder_select="lsp"
 amv_decoder_select="sp5x_decoder exif"
 amv_encoder_select="jpegtables mpegvideoenc"
-ape_decoder_select="bswapdsp llauddsp"
+ape_decoder_select="llauddsp"
 apng_decoder_select="inflate_wrapper"
 apng_encoder_select="deflate_wrapper llvidencdsp"
 aptx_encoder_select="audio_frame_queue"
 aptx_hd_encoder_select="audio_frame_queue"
-asv1_decoder_select="blockdsp bswapdsp idctdsp"
-asv1_enc

[FFmpeg-devel] [PATCH 2/2] libswscale: add AVBSwapDSPContext and use

2022-12-04 Thread mindmark

From: Mark Reid 

There are some places in input.c that could use it too
but they aren't currently being pass the SwsContext
---
 libswscale/output.c   | 36 +++
 libswscale/swscale_internal.h |  3 +++
 libswscale/swscale_unscaled.c | 26 +
 libswscale/utils.c|  2 ++
 4 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..cd44081e3d 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2313,13 +2313,11 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
-for (i = 0; i < dstW; i++) {
-dest16[0][i] = av_bswap16(dest16[0][i]);
-dest16[1][i] = av_bswap16(dest16[1][i]);
-dest16[2][i] = av_bswap16(dest16[2][i]);
-if (hasAlpha)
-dest16[3][i] = av_bswap16(dest16[3][i]);
-}
+c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW);
+c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW);
+c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW);
+if (hasAlpha)
+c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW);
 }
 }
 
@@ -2385,13 +2383,11 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 dest16[3][i] = av_clip_uintp2(A, 30) >> 14;
 }
 if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
-for (i = 0; i < dstW; i++) {
-dest16[0][i] = av_bswap16(dest16[0][i]);
-dest16[1][i] = av_bswap16(dest16[1][i]);
-dest16[2][i] = av_bswap16(dest16[2][i]);
-if (hasAlpha)
-dest16[3][i] = av_bswap16(dest16[3][i]);
-}
+c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW);
+c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW);
+c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW);
+if (hasAlpha)
+c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW);
 }
 }
 
@@ -2461,13 +2457,11 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 
30) >> 14));
 }
 if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
-for (i = 0; i < dstW; i++) {
-dest32[0][i] = av_bswap32(dest32[0][i]);
-dest32[1][i] = av_bswap32(dest32[1][i]);
-dest32[2][i] = av_bswap32(dest32[2][i]);
-if (hasAlpha)
-dest32[3][i] = av_bswap32(dest32[3][i]);
-}
+c->bsdsp.bswap32_buf(dest32[0], dest32[0], dstW);
+c->bsdsp.bswap32_buf(dest32[1], dest32[1], dstW);
+c->bsdsp.bswap32_buf(dest32[2], dest32[2], dstW);
+if (hasAlpha)
+c->bsdsp.bswap32_buf(dest32[3], dest32[3], dstW);
 }
 }
 
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index abeebbb002..400f0bc8ed 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -26,6 +26,7 @@
 #include "config.h"
 
 #include "libavutil/avassert.h"
+#include "libavutil/bswapdsp.h"
 #include "libavutil/common.h"
 #include "libavutil/frame.h"
 #include "libavutil/intreadwrite.h"
@@ -682,6 +683,8 @@ typedef struct SwsContext {
 atomic_int   data_unaligned_warned;
 
 Half2FloatTables *h2f_tables;
+
+AVBSwapDSPContext bsdsp;
 } SwsContext;
 //FIXME check init (where 0)
 
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 9af2e7ecc3..0010ab24d1 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -468,7 +468,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
   int srcStride[], int srcSliceY, int srcSliceH,
   uint8_t *dst[], int dstStride[])
 {
-int i, j, p;
+int i, p;
 
 for (p = 0; p < 4; p++) {
 int srcstr = srcStride[p] / 2;
@@ -480,9 +480,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
 continue;
 dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr;
 for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) {
-for (j = 0; j < min_stride; j++) {
-dstPtr[j] = av_bswap16(srcPtr[j]);
-}
+c->bsdsp.bswap16_buf(dstPtr, srcPtr, min_stride);
 srcPtr += srcstr;
 dstPtr += dststr;
 }
@@ -495,7 +493,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[],
   int srcStride[], int srcSliceY, int srcSliceH,
   uint8_t *dst[], int dstStride[])
 {
-int i, j, p;
+int i, p;
 
 for (p = 0; p < 4; p++) {
 int srcstr = srcStride[p] / 4;
@@ -507,9 +505,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[],
 continue;
 dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr;
 for (i = 0; i < (srcSliceH >> c

[FFmpeg-devel] [PATCH v5 4/4] swscale/output: add rgbaf32 output support

2022-11-23 Thread mindmark

From: Mark Reid 

---
 libswscale/output.c  | 92 
 libswscale/swscale_unscaled.c|  4 +-
 libswscale/tests/floatimg_cmp.c  |  4 +-
 libswscale/utils.c   | 16 +++--
 libswscale/yuv2rgb.c |  2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 tests/ref/fate/sws-floatimg-cmp  | 16 +
 20 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..1d86a244f9 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t *dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
+int hasAlpha = alpha && alpSrcx;
+int pixelStep = alpha ? 4 : 3;
+uint32_t *dest32 = (uint32_t*)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+uint32_t a = av_float2int(1.0f);
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 
14));
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += (1 << 13) - (1 << 29);
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
+G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
+B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
+
+dest32[0] = av_float2int(float_mult * (float)R);
+dest32[1] = av_float2int(float_mult * (float)G);
+dest32[2] = av_float2int(float_mult * (float)B);
+if (alpha)
+dest32[3] = a;
+
+dest32 += pixelStep;
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+dest32 = (uint32_t*)dest;
+for (i = 0; i < dstW; i++) {
+dest32[0] = av_bswap32(dest32[0]);
+dest32[1] = av_bswap32(dest32[1]);
+dest32[2] = av_bswap32(dest32[2]);
+if (alpha)
+dest32[3] = av_bswap32(dest32[3]);
+
+dest32 += pixelStep;
+}
+}
+
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 }
 break;
 
+case AV_PIX_FMT_RGBF32LE:
+case AV_PIX_FMT_RGBF32BE:
+case AV_PIX_FMT_RGBAF32LE:
+case AV_PIX_FMT_RGBAF32BE:
+*yuv2packedX = yuv2rgbaf32_full_X_c;
+break;
 case AV_PIX_FMT_RGB24:

[FFmpeg-devel] [PATCH v5 3/4] avfilter/vf_transpose: add support for packed rgb float formats

2022-11-23 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_transpose.c | 44 ++
 1 file changed, 44 insertions(+)

diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 469e66729f..1023d6fe82 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t 
src_linesize,
 transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8);
 }
 
+static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize,
+int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 12) {
+for (x = 0; x < w; x++) {
+*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + 
x*src_linesize));
+*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + 
x*src_linesize));
+*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize,
+   uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
+
+static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 16) {
+for (x = 0; x < w; x++) {
+*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + 
x*src_linesize));
+*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
 static int config_props_output(AVFilterLink *outlink)
 {
 AVFilterContext *ctx = outlink->src;
@@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink)
 v->transpose_8x8   = transpose_8x8_48_c; break;
 case 8: v->transpose_block = transpose_block_64_c;
 v->transpose_8x8   = transpose_8x8_64_c; break;
+case 12: v->transpose_block = transpose_block_96_c;
+ v->transpose_8x8   = transpose_8x8_96_c; break;
+case 16: v->transpose_block = transpose_block_128_c;
+ v->transpose_8x8   = transpose_8x8_128_c; break;
 }
 }
 
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v5 2/4] avfilter/vf_hflip: add support for packed rgb float formats

2022-11-23 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_hflip_init.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h
index d0319f463d..31173f73fc 100644
--- a/libavfilter/vf_hflip_init.h
+++ b/libavfilter/vf_hflip_init.h
@@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t 
*ddst, int w)
 dst[j] = src[-j];
 }
 
+static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint32_t *in = (const uint32_t *)ssrc;
+uint32_t *out = (uint32_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 3, in -= 3) {
+out[0] = in[0];
+out[1] = in[1];
+out[2] = in[2];
+}
+}
+
+static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint64_t *in = (const uint64_t *)ssrc;
+uint64_t *out = (uint64_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 2, in -= 2) {
+out[0] = in[0];
+out[1] = in[1];
+}
+}
+
 static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes)
 {
 for (int i = 0; i < nb_planes; i++) {
@@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int 
step[4], int nb_planes)
 case 4: s->flip_line[i] = hflip_dword_c; break;
 case 6: s->flip_line[i] = hflip_b48_c;   break;
 case 8: s->flip_line[i] = hflip_qword_c; break;
+case 12: s->flip_line[i] = hflip_b96_c; break;
+case 16: s->flip_line[i] = hflip_b128_c; break;
 default:
 return AVERROR_BUG;
 }
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v5 1/4] swscale/input: add rgbaf32 input support

2022-11-23 Thread mindmark

From: Mark Reid 

The input functions are currently matching the planar f32 functions.
They can be factorized removing multiple lrintf/av_clipf calls,
this will be addressed in a future patch.
---
 libswscale/input.c | 122 +
 libswscale/utils.c |   6 +++
 2 files changed, 128 insertions(+)

diff --git a/libswscale/input.c b/libswscale/input.c
index d5676062a2..9c9eb31cde 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1284,6 +1284,98 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, 
const uint8_t *_src, cons
 rgbaf16_funcs_endian(le, 0)
 rgbaf16_funcs_endian(be, 1)
 
+#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): 
av_int2float(AV_RL32(&src)))
+
+static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+const float *src, int width,
+int32_t *rgb2yuv, int comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+/*TODO: refactor these f32 conversions to only have one lrintf and 
av_clipf call*/
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, int32_t *rgb2yuv, 
int comp)
+{
+int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+int i;
+/*TODO: refactor these f32 conversions to only have one lrintf and 
av_clipf call*/
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, void *opq)
+{
+int i;
+for (i=0; isrcFormat;
@@ -1663,6 +1755,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_c;
+break;
 }
 }
 
@@ -1973,6 +2077,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->lumToYV12 = rgbaf16leToY_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->lumToYV12 = rgbf32beToY_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->lumToYV12 = rgbaf32beToY_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->lumToYV12 = rgbf32leToY_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->lumToYV12 = rgbaf32leToY_c;
+break;
 }
 if (c->needAlpha) {
 if (is16BPS(srcFormat) || isNBPS(srcFormat)) {
@@ -1998,6 +2114,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->alpToYV12 = rgbaf16leToA_c;
 break;
+case AV_PIX_FMT_RGBAF32BE:
+c->alpToYV12 = rgbaf32beToA_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->alpToYV12 = rgbaf32leToA_c;
+break;
 case AV_PIX_FMT_YA8:
 c->alpToYV12 = uyvyToY_c;
 break;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 85640a143f..2c520f68d1 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -266,6 +266,10 @@ static const FormatEntry format_entries[] = {
 [AV_PIX_FMT_VUYX]= { 1, 1 },
 [AV_PIX_FMT_RGBAF16BE]   = { 1, 0 },
 [AV_PIX_FMT_RGBAF16LE]   = { 1, 0 },
+[AV_PIX_FMT_RGBF32BE]= { 1, 0 },
+[AV_PIX_FMT_RGBF32LE]= { 1, 0 },
+[AV_PIX_FMT_RGBAF32BE]   = { 1, 0 },
+[AV_PIX_FMT_RGBAF32LE]   = { 1, 0 },
 [AV_PIX_FMT_XV30LE]  = { 1, 1 },
 [AV_PIX_FMT_XV36LE]  = { 1, 1 },
 };
@@ -1572,6 +1576,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilte

[FFmpeg-devel] [PATCH v5 0/4] swscale: rgbaf32 input/output support

2022-11-23 Thread mindmark

From: Mark Reid 

This patch series adds swscale input/output support for the packed rgb float 
formats.
A few of the filters also needed support the larger 96/128 bit packed pixel 
sizes.

I also plan to eventually add lossless unscaled conversions between the planer 
and packed formats.

changes since v4
* added comment about refactoring input functions
changes since v3
* removed half uv path implementation
changes since v2
* add bias to rgbaf32 output to improve non overflowing range
changes since v1
* output correct alpha if src doesn't have alpha


Mark Reid (4):
  swscale/input: add rgbaf32 input support
  avfilter/vf_hflip: add support for packed rgb float formats
  avfilter/vf_transpose: add support for packed rgb float formats
  swscale/output: add rgbaf32 output support

 libavfilter/vf_hflip_init.h  |  25 +
 libavfilter/vf_transpose.c   |  44 
 libswscale/input.c   | 122 +++
 libswscale/output.c  |  92 +
 libswscale/swscale_unscaled.c|   4 +-
 libswscale/tests/floatimg_cmp.c  |   4 +-
 libswscale/utils.c   |  14 ++-
 libswscale/yuv2rgb.c |   2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |   1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |   1 +
 tests/ref/fate/filter-pixfmts-copy   |   4 +
 tests/ref/fate/filter-pixfmts-crop   |   4 +
 tests/ref/fate/filter-pixfmts-field  |   4 +
 tests/ref/fate/filter-pixfmts-fieldorder |   4 +
 tests/ref/fate/filter-pixfmts-hflip  |   4 +
 tests/ref/fate/filter-pixfmts-il |   4 +
 tests/ref/fate/filter-pixfmts-null   |   4 +
 tests/ref/fate/filter-pixfmts-scale  |   4 +
 tests/ref/fate/filter-pixfmts-transpose  |   4 +
 tests/ref/fate/filter-pixfmts-vflip  |   4 +
 tests/ref/fate/sws-floatimg-cmp  |  16 +++
 23 files changed, 363 insertions(+), 4 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

--
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v4 4/4] swscale/output: add rgbaf32 output support

2022-11-21 Thread mindmark

From: Mark Reid 

---
 libswscale/output.c  | 92 
 libswscale/swscale_unscaled.c|  4 +-
 libswscale/tests/floatimg_cmp.c  |  4 +-
 libswscale/utils.c   | 16 +++--
 libswscale/yuv2rgb.c |  2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 tests/ref/fate/sws-floatimg-cmp  | 16 +
 20 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..1d86a244f9 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t *dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
+int hasAlpha = alpha && alpSrcx;
+int pixelStep = alpha ? 4 : 3;
+uint32_t *dest32 = (uint32_t*)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+uint32_t a = av_float2int(1.0f);
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 
14));
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += (1 << 13) - (1 << 29);
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
+G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
+B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
+
+dest32[0] = av_float2int(float_mult * (float)R);
+dest32[1] = av_float2int(float_mult * (float)G);
+dest32[2] = av_float2int(float_mult * (float)B);
+if (alpha)
+dest32[3] = a;
+
+dest32 += pixelStep;
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+dest32 = (uint32_t*)dest;
+for (i = 0; i < dstW; i++) {
+dest32[0] = av_bswap32(dest32[0]);
+dest32[1] = av_bswap32(dest32[1]);
+dest32[2] = av_bswap32(dest32[2]);
+if (alpha)
+dest32[3] = av_bswap32(dest32[3]);
+
+dest32 += pixelStep;
+}
+}
+
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 }
 break;
 
+case AV_PIX_FMT_RGBF32LE:
+case AV_PIX_FMT_RGBF32BE:
+case AV_PIX_FMT_RGBAF32LE:
+case AV_PIX_FMT_RGBAF32BE:
+*yuv2packedX = yuv2rgbaf32_full_X_c;
+break;
 case AV_PIX_FMT_RGB24:

[FFmpeg-devel] [PATCH v4 3/4] avfilter/vf_transpose: add support for packed rgb float formats

2022-11-21 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_transpose.c | 44 ++
 1 file changed, 44 insertions(+)

diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 469e66729f..1023d6fe82 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t 
src_linesize,
 transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8);
 }
 
+static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize,
+int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 12) {
+for (x = 0; x < w; x++) {
+*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + 
x*src_linesize));
+*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + 
x*src_linesize));
+*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize,
+   uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
+
+static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 16) {
+for (x = 0; x < w; x++) {
+*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + 
x*src_linesize));
+*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
 static int config_props_output(AVFilterLink *outlink)
 {
 AVFilterContext *ctx = outlink->src;
@@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink)
 v->transpose_8x8   = transpose_8x8_48_c; break;
 case 8: v->transpose_block = transpose_block_64_c;
 v->transpose_8x8   = transpose_8x8_64_c; break;
+case 12: v->transpose_block = transpose_block_96_c;
+ v->transpose_8x8   = transpose_8x8_96_c; break;
+case 16: v->transpose_block = transpose_block_128_c;
+ v->transpose_8x8   = transpose_8x8_128_c; break;
 }
 }
 
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v4 2/4] avfilter/vf_hflip: add support for packed rgb float formats

2022-11-21 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_hflip_init.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h
index d0319f463d..31173f73fc 100644
--- a/libavfilter/vf_hflip_init.h
+++ b/libavfilter/vf_hflip_init.h
@@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t 
*ddst, int w)
 dst[j] = src[-j];
 }
 
+static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint32_t *in = (const uint32_t *)ssrc;
+uint32_t *out = (uint32_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 3, in -= 3) {
+out[0] = in[0];
+out[1] = in[1];
+out[2] = in[2];
+}
+}
+
+static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint64_t *in = (const uint64_t *)ssrc;
+uint64_t *out = (uint64_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 2, in -= 2) {
+out[0] = in[0];
+out[1] = in[1];
+}
+}
+
 static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes)
 {
 for (int i = 0; i < nb_planes; i++) {
@@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int 
step[4], int nb_planes)
 case 4: s->flip_line[i] = hflip_dword_c; break;
 case 6: s->flip_line[i] = hflip_b48_c;   break;
 case 8: s->flip_line[i] = hflip_qword_c; break;
+case 12: s->flip_line[i] = hflip_b96_c; break;
+case 16: s->flip_line[i] = hflip_b128_c; break;
 default:
 return AVERROR_BUG;
 }
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v4 1/4] swscale/input: add rgbaf32 input support

2022-11-21 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c | 120 +
 libswscale/utils.c |   6 +++
 2 files changed, 126 insertions(+)

diff --git a/libswscale/input.c b/libswscale/input.c
index d5676062a2..a305be5ac2 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1284,6 +1284,96 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, 
const uint8_t *_src, cons
 rgbaf16_funcs_endian(le, 0)
 rgbaf16_funcs_endian(be, 1)
 
+#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): 
av_int2float(AV_RL32(&src)))
+
+static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+const float *src, int width,
+int32_t *rgb2yuv, int comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, int32_t *rgb2yuv, 
int comp)
+{
+int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, void *opq)
+{
+int i;
+for (i=0; isrcFormat;
@@ -1663,6 +1753,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_c;
+break;
 }
 }
 
@@ -1973,6 +2075,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->lumToYV12 = rgbaf16leToY_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->lumToYV12 = rgbf32beToY_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->lumToYV12 = rgbaf32beToY_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->lumToYV12 = rgbf32leToY_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->lumToYV12 = rgbaf32leToY_c;
+break;
 }
 if (c->needAlpha) {
 if (is16BPS(srcFormat) || isNBPS(srcFormat)) {
@@ -1998,6 +2112,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->alpToYV12 = rgbaf16leToA_c;
 break;
+case AV_PIX_FMT_RGBAF32BE:
+c->alpToYV12 = rgbaf32beToA_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->alpToYV12 = rgbaf32leToA_c;
+break;
 case AV_PIX_FMT_YA8:
 c->alpToYV12 = uyvyToY_c;
 break;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 85640a143f..2c520f68d1 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -266,6 +266,10 @@ static const FormatEntry format_entries[] = {
 [AV_PIX_FMT_VUYX]= { 1, 1 },
 [AV_PIX_FMT_RGBAF16BE]   = { 1, 0 },
 [AV_PIX_FMT_RGBAF16LE]   = { 1, 0 },
+[AV_PIX_FMT_RGBF32BE]= { 1, 0 },
+[AV_PIX_FMT_RGBF32LE]= { 1, 0 },
+[AV_PIX_FMT_RGBAF32BE]   = { 1, 0 },
+[AV_PIX_FMT_RGBAF32LE]   = { 1, 0 },
 [AV_PIX_FMT_XV30LE]  = { 1, 1 },
 [AV_PIX_FMT_XV36LE]  = { 1, 1 },
 };
@@ -1572,6 +1576,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter 
*srcFilter,
 srcFormat != AV_PIX_FMT_GBRAP16BE  && srcFormat != 
AV_PIX_FMT_GBRAP16LE &&
 srcFormat != AV_PIX_FMT_GBRPF32BE  && srcFormat != 
AV_PIX_FMT_GBRPF32LE &&
 srcFormat != AV_PIX_FMT_GBRAPF32BE && srcFormat != 
AV_PIX_FMT_GBRAPF32LE &&
+srcFormat != AV_PIX_FMT_RGBF32BE   && srcFormat != AV_PIX_FMT_RGBF32LE

[FFmpeg-devel] [PATCH v4 0/4] swscale: rgbaf32 input/output support

2022-11-21 Thread mindmark

From: Mark Reid 

This patch series adds swscale input/output support for the packed rgb float 
formats.
A few of the filters also needed support the larger 96/128 bit packed pixel 
sizes.

I also plan to eventually add lossless unscaled conversions between the planer 
and packed formats.

changes since v3
* removed half uv path implementation
changes since v2
* add bias to rgbaf32 output to improve non overflowing range
changes since v1
* output correct alpha if src doesn't have alpha

Mark Reid (4):
  swscale/input: add rgbaf32 input support
  avfilter/vf_hflip: add support for packed rgb float formats
  avfilter/vf_transpose: add support for packed rgb float formats
  swscale/output: add rgbaf32 output support

 libavfilter/vf_hflip_init.h  |  25 +
 libavfilter/vf_transpose.c   |  44 +
 libswscale/input.c   | 120 +++
 libswscale/output.c  |  92 +
 libswscale/swscale_unscaled.c|   4 +-
 libswscale/tests/floatimg_cmp.c  |   4 +-
 libswscale/utils.c   |  14 ++-
 libswscale/yuv2rgb.c |   2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |   1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |   1 +
 tests/ref/fate/filter-pixfmts-copy   |   4 +
 tests/ref/fate/filter-pixfmts-crop   |   4 +
 tests/ref/fate/filter-pixfmts-field  |   4 +
 tests/ref/fate/filter-pixfmts-fieldorder |   4 +
 tests/ref/fate/filter-pixfmts-hflip  |   4 +
 tests/ref/fate/filter-pixfmts-il |   4 +
 tests/ref/fate/filter-pixfmts-null   |   4 +
 tests/ref/fate/filter-pixfmts-scale  |   4 +
 tests/ref/fate/filter-pixfmts-transpose  |   4 +
 tests/ref/fate/filter-pixfmts-vflip  |   4 +
 tests/ref/fate/sws-floatimg-cmp  |  16 +++
 23 files changed, 361 insertions(+), 4 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

--
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v3 4/4] swscale/output: add rgbaf32 output support

2022-11-02 Thread mindmark

From: Mark Reid 

---
 libswscale/output.c  | 92 
 libswscale/swscale_unscaled.c|  4 +-
 libswscale/tests/floatimg_cmp.c  |  4 +-
 libswscale/utils.c   | 16 +++--
 libswscale/yuv2rgb.c |  2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 tests/ref/fate/sws-floatimg-cmp  | 16 +
 20 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..1d86a244f9 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2471,6 +2471,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t *dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
+int hasAlpha = alpha && alpSrcx;
+int pixelStep = alpha ? 4 : 3;
+uint32_t *dest32 = (uint32_t*)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+uint32_t a = av_float2int(1.0f);
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 
14));
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += (1 << 13) - (1 << 29);
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
+G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
+B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
+
+dest32[0] = av_float2int(float_mult * (float)R);
+dest32[1] = av_float2int(float_mult * (float)G);
+dest32[2] = av_float2int(float_mult * (float)B);
+if (alpha)
+dest32[3] = a;
+
+dest32 += pixelStep;
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+dest32 = (uint32_t*)dest;
+for (i = 0; i < dstW; i++) {
+dest32[0] = av_bswap32(dest32[0]);
+dest32[1] = av_bswap32(dest32[1]);
+dest32[2] = av_bswap32(dest32[2]);
+if (alpha)
+dest32[3] = av_bswap32(dest32[3]);
+
+dest32 += pixelStep;
+}
+}
+
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2983,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 }
 break;
 
+case AV_PIX_FMT_RGBF32LE:
+case AV_PIX_FMT_RGBF32BE:
+case AV_PIX_FMT_RGBAF32LE:
+case AV_PIX_FMT_RGBAF32BE:
+*yuv2packedX = yuv2rgbaf32_full_X_c;
+break;
 case AV_PIX_FMT_RGB24:

[FFmpeg-devel] [PATCH v3 3/4] avfilter/vf_transpose: add support for packed rgb float formats

2022-11-02 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_transpose.c | 44 ++
 1 file changed, 44 insertions(+)

diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 469e66729f..1023d6fe82 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t 
src_linesize,
 transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8);
 }
 
+static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize,
+int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 12) {
+for (x = 0; x < w; x++) {
+*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + 
x*src_linesize));
+*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + 
x*src_linesize));
+*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize,
+   uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
+
+static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 16) {
+for (x = 0; x < w; x++) {
+*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + 
x*src_linesize));
+*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
 static int config_props_output(AVFilterLink *outlink)
 {
 AVFilterContext *ctx = outlink->src;
@@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink)
 v->transpose_8x8   = transpose_8x8_48_c; break;
 case 8: v->transpose_block = transpose_block_64_c;
 v->transpose_8x8   = transpose_8x8_64_c; break;
+case 12: v->transpose_block = transpose_block_96_c;
+ v->transpose_8x8   = transpose_8x8_96_c; break;
+case 16: v->transpose_block = transpose_block_128_c;
+ v->transpose_8x8   = transpose_8x8_128_c; break;
 }
 }
 
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v3 2/4] avfilter/vf_hflip: add support for packed rgb float formats

2022-11-02 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_hflip_init.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h
index d0319f463d..31173f73fc 100644
--- a/libavfilter/vf_hflip_init.h
+++ b/libavfilter/vf_hflip_init.h
@@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t 
*ddst, int w)
 dst[j] = src[-j];
 }
 
+static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint32_t *in = (const uint32_t *)ssrc;
+uint32_t *out = (uint32_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 3, in -= 3) {
+out[0] = in[0];
+out[1] = in[1];
+out[2] = in[2];
+}
+}
+
+static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint64_t *in = (const uint64_t *)ssrc;
+uint64_t *out = (uint64_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 2, in -= 2) {
+out[0] = in[0];
+out[1] = in[1];
+}
+}
+
 static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes)
 {
 for (int i = 0; i < nb_planes; i++) {
@@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int 
step[4], int nb_planes)
 case 4: s->flip_line[i] = hflip_dword_c; break;
 case 6: s->flip_line[i] = hflip_b48_c;   break;
 case 8: s->flip_line[i] = hflip_qword_c; break;
+case 12: s->flip_line[i] = hflip_b96_c; break;
+case 16: s->flip_line[i] = hflip_b128_c; break;
 default:
 return AVERROR_BUG;
 }
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v3 1/4] swscale/input: add rgbaf32 input support

2022-11-02 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c | 172 +
 libswscale/utils.c |   4 ++
 2 files changed, 176 insertions(+)

diff --git a/libswscale/input.c b/libswscale/input.c
index 7ff7bfaa01..4683284b0b 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, 
const uint8_t *_src, cons
 rgbaf16_funcs_endian(le, 0)
 rgbaf16_funcs_endian(be, 1)
 
+#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): 
av_int2float(AV_RL32(&src)))
+
+static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+ const float *src, int 
width,
+ int32_t *rgb2yuv, int 
comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 
65535.0f))) >> 1;
+int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 
65535.0f))) >> 1;
+int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 
65535.0f))) >> 1;
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+const float *src, int width,
+int32_t *rgb2yuv, int comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, int32_t *rgb2yuv, 
int comp)
+{
+int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, void *opq)
+{
+int i;
+for (i=0; isrcFormat;
@@ -1570,6 +1700,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_half_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_half_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_half_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_half_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_half_c;
+break;
 }
 } else {
 switch (srcFormat) {
@@ -1663,6 +1805,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_c;
+break;
 }
 }
 
@@ -1973,6 +2127,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->lumToYV12 = rgbaf16leToY_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+

[FFmpeg-devel] [PATCH v3 0/4] swscale rgbaf32 input/output support

2022-11-02 Thread mindmark

From: Mark Reid 

This patch series adds swscale input/output support for the packed rgb float 
formats.
A few of the filters also needed support the larger 96/128 bit packed pixel 
sizes.

I also plan to eventually add lossless unscaled conversions between the planer 
and packed formats.

changes since v2
* add bias to rgbaf32 output to improve non overflowing range
changes since v1
* output correct alpha if src doesn't have alpha

Mark Reid (4):
  swscale/input: add rgbaf32 input support
  avfilter/vf_hflip: add support for packed rgb float formats
  avfilter/vf_transpose: add support for packed rgb float formats
  swscale/output: add rgbaf32 output support

 libavfilter/vf_hflip_init.h  |  25 
 libavfilter/vf_transpose.c   |  44 ++
 libswscale/input.c   | 172 +++
 libswscale/output.c  |  92 
 libswscale/swscale_unscaled.c|   4 +-
 libswscale/tests/floatimg_cmp.c  |   4 +-
 libswscale/utils.c   |  12 +-
 libswscale/yuv2rgb.c |   2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |   1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |   1 +
 tests/ref/fate/filter-pixfmts-copy   |   4 +
 tests/ref/fate/filter-pixfmts-crop   |   4 +
 tests/ref/fate/filter-pixfmts-field  |   4 +
 tests/ref/fate/filter-pixfmts-fieldorder |   4 +
 tests/ref/fate/filter-pixfmts-hflip  |   4 +
 tests/ref/fate/filter-pixfmts-il |   4 +
 tests/ref/fate/filter-pixfmts-null   |   4 +
 tests/ref/fate/filter-pixfmts-scale  |   4 +
 tests/ref/fate/filter-pixfmts-transpose  |   4 +
 tests/ref/fate/filter-pixfmts-vflip  |   4 +
 tests/ref/fate/sws-floatimg-cmp  |  16 +++
 23 files changed, 411 insertions(+), 4 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

--
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 4/4] swscale/output: add rgbaf32 output support

2022-10-30 Thread mindmark

From: Mark Reid 

---
 libswscale/output.c  | 92 
 libswscale/swscale_unscaled.c|  4 +-
 libswscale/tests/floatimg_cmp.c  |  4 +-
 libswscale/utils.c   | 16 +++--
 libswscale/yuv2rgb.c |  2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 tests/ref/fate/sws-floatimg-cmp  | 16 +
 20 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 0e1c1225a0..e2ec9cbdf5 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2474,6 +2474,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t *dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
+int hasAlpha = alpha && alpSrcx;
+int pixelStep = alpha ? 4 : 3;
+uint32_t *dest32 = (uint32_t*)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+uint32_t a = av_float2int(1.0f);
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 
14));
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += 1 << 13;
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(Y + R, 30);
+G = av_clip_uintp2(Y + G, 30);
+B = av_clip_uintp2(Y + B, 30);
+
+dest32[0] = av_float2int(float_mult * (float)(R >> 14));
+dest32[1] = av_float2int(float_mult * (float)(G >> 14));
+dest32[2] = av_float2int(float_mult * (float)(B >> 14));
+if (alpha)
+dest32[3] = a;
+
+dest32 += pixelStep;
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+dest32 = (uint32_t*)dest;
+for (i = 0; i < dstW; i++) {
+dest32[0] = av_bswap32(dest32[0]);
+dest32[1] = av_bswap32(dest32[1]);
+dest32[2] = av_bswap32(dest32[2]);
+if (alpha)
+dest32[3] = av_bswap32(dest32[3]);
+
+dest32 += pixelStep;
+}
+}
+
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2986,6 +3072,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 }
 break;
 
+case AV_PIX_FMT_RGBF32LE:
+case AV_PIX_FMT_RGBF32BE:
+case AV_PIX_FMT_RGBAF32LE:
+case AV_PIX_FMT_RGBAF32BE:
+*yuv2packedX = yuv2rgbaf32_full_X_c;
+break;
 case AV_PIX_FMT_RGB24:
 *yuv2packedX = yuv2rgb24_full_X_c;

[FFmpeg-devel] [PATCH v2 3/4] avfilter/vf_transpose: add support for packed rgb float formats

2022-10-30 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_transpose.c | 44 ++
 1 file changed, 44 insertions(+)

diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 469e66729f..1023d6fe82 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t 
src_linesize,
 transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8);
 }
 
+static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize,
+int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 12) {
+for (x = 0; x < w; x++) {
+*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + 
x*src_linesize));
+*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + 
x*src_linesize));
+*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize,
+   uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
+
+static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 16) {
+for (x = 0; x < w; x++) {
+*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + 
x*src_linesize));
+*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
 static int config_props_output(AVFilterLink *outlink)
 {
 AVFilterContext *ctx = outlink->src;
@@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink)
 v->transpose_8x8   = transpose_8x8_48_c; break;
 case 8: v->transpose_block = transpose_block_64_c;
 v->transpose_8x8   = transpose_8x8_64_c; break;
+case 12: v->transpose_block = transpose_block_96_c;
+ v->transpose_8x8   = transpose_8x8_96_c; break;
+case 16: v->transpose_block = transpose_block_128_c;
+ v->transpose_8x8   = transpose_8x8_128_c; break;
 }
 }
 
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 2/4] avfilter/vf_hflip: add support for packed rgb float formats

2022-10-30 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_hflip_init.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h
index d0319f463d..31173f73fc 100644
--- a/libavfilter/vf_hflip_init.h
+++ b/libavfilter/vf_hflip_init.h
@@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t 
*ddst, int w)
 dst[j] = src[-j];
 }
 
+static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint32_t *in = (const uint32_t *)ssrc;
+uint32_t *out = (uint32_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 3, in -= 3) {
+out[0] = in[0];
+out[1] = in[1];
+out[2] = in[2];
+}
+}
+
+static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint64_t *in = (const uint64_t *)ssrc;
+uint64_t *out = (uint64_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 2, in -= 2) {
+out[0] = in[0];
+out[1] = in[1];
+}
+}
+
 static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes)
 {
 for (int i = 0; i < nb_planes; i++) {
@@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int 
step[4], int nb_planes)
 case 4: s->flip_line[i] = hflip_dword_c; break;
 case 6: s->flip_line[i] = hflip_b48_c;   break;
 case 8: s->flip_line[i] = hflip_qword_c; break;
+case 12: s->flip_line[i] = hflip_b96_c; break;
+case 16: s->flip_line[i] = hflip_b128_c; break;
 default:
 return AVERROR_BUG;
 }
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 1/4] swscale/input: add rgbaf32 input support

2022-10-30 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c | 172 +
 libswscale/utils.c |   4 ++
 2 files changed, 176 insertions(+)

diff --git a/libswscale/input.c b/libswscale/input.c
index 7ff7bfaa01..4683284b0b 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, 
const uint8_t *_src, cons
 rgbaf16_funcs_endian(le, 0)
 rgbaf16_funcs_endian(be, 1)
 
+#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): 
av_int2float(AV_RL32(&src)))
+
+static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+ const float *src, int 
width,
+ int32_t *rgb2yuv, int 
comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 
65535.0f))) >> 1;
+int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 
65535.0f))) >> 1;
+int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 
65535.0f))) >> 1;
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+const float *src, int width,
+int32_t *rgb2yuv, int comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, int32_t *rgb2yuv, 
int comp)
+{
+int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, void *opq)
+{
+int i;
+for (i=0; isrcFormat;
@@ -1570,6 +1700,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_half_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_half_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_half_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_half_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_half_c;
+break;
 }
 } else {
 switch (srcFormat) {
@@ -1663,6 +1805,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_c;
+break;
 }
 }
 
@@ -1973,6 +2127,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->lumToYV12 = rgbaf16leToY_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+

[FFmpeg-devel] [PATCH v2 0/4] swscale rgbaf32 input/output support

2022-10-30 Thread mindmark

From: Mark Reid 

This patch series adds swscale input/output support for the packed rgb float 
formats.
A few of the filters also needed support the larger 96/128 bit packed pixel 
sizes.

I also plan to eventually add lossless unscaled conversions between the planer 
and packed formats.

changes since v1
* output correct alpha is src doesn't have alpha

Mark Reid (4):
  swscale/input: add rgbaf32 input support
  avfilter/vf_hflip: add support for packed rgb float formats
  avfilter/vf_transpose: add support for packed rgb float formats
  swscale/output: add rgbaf32 output support

 libavfilter/vf_hflip_init.h  |  25 
 libavfilter/vf_transpose.c   |  44 ++
 libswscale/input.c   | 172 +++
 libswscale/output.c  |  92 
 libswscale/swscale_unscaled.c|   4 +-
 libswscale/tests/floatimg_cmp.c  |   4 +-
 libswscale/utils.c   |  12 +-
 libswscale/yuv2rgb.c |   2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |   1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |   1 +
 tests/ref/fate/filter-pixfmts-copy   |   4 +
 tests/ref/fate/filter-pixfmts-crop   |   4 +
 tests/ref/fate/filter-pixfmts-field  |   4 +
 tests/ref/fate/filter-pixfmts-fieldorder |   4 +
 tests/ref/fate/filter-pixfmts-hflip  |   4 +
 tests/ref/fate/filter-pixfmts-il |   4 +
 tests/ref/fate/filter-pixfmts-null   |   4 +
 tests/ref/fate/filter-pixfmts-scale  |   4 +
 tests/ref/fate/filter-pixfmts-transpose  |   4 +
 tests/ref/fate/filter-pixfmts-vflip  |   4 +
 tests/ref/fate/sws-floatimg-cmp  |  16 +++
 23 files changed, 411 insertions(+), 4 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

--
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] avcodec/tiff: add read support for compressed rgb floating point formats

2022-10-01 Thread mindmark

From: Mark Reid 

floating point uses a slightly different predictor technique describe here
http://chriscox.org/TIFFTN3d1.pdf

Here is a link the test files, if someone could add them to fate me
https://www.dropbox.com/s/fg59h2os4gb4wug/tiff_fate_samples.zip


---
 libavcodec/tiff.c  | 68 ++
 tests/fate/image.mak   | 20 ++-
 tests/ref/fate/tiff-lzw-rgbaf32le  |  6 ++
 tests/ref/fate/tiff-lzw-rgbf32le   |  6 ++
 tests/ref/fate/tiff-uncompressed-rgbaf32le |  6 ++
 tests/ref/fate/tiff-uncompressed-rgbf32le  |  6 ++
 tests/ref/fate/tiff-zip-rgbaf32le  |  6 ++
 tests/ref/fate/tiff-zip-rgbf32le   |  6 ++
 8 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 tests/ref/fate/tiff-lzw-rgbaf32le
 create mode 100644 tests/ref/fate/tiff-lzw-rgbf32le
 create mode 100644 tests/ref/fate/tiff-uncompressed-rgbaf32le
 create mode 100644 tests/ref/fate/tiff-uncompressed-rgbf32le
 create mode 100644 tests/ref/fate/tiff-zip-rgbaf32le
 create mode 100644 tests/ref/fate/tiff-zip-rgbf32le

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 3a610ada85..ce31a40e37 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -2249,6 +2249,74 @@ again:
 }
 }

+/* Floating point predictor
+   TIFF Technical Note 3 http://chriscox.org/TIFFTN3d1.pdf */
+if (s->predictor == 3) {
+int channels = s->bppcount;
+int group_size;
+uint8_t *tmpbuf;
+int bpc;
+
+dst   = five_planes ? five_planes : p->data[plane];
+soff  = s->bpp >> 3;
+if (s->planar) {
+soff  = FFMAX(soff / s->bppcount, 1);
+channels = 1;
+}
+ssize = s->width * soff;
+bpc = FFMAX(soff / s->bppcount, 1); /* Bytes per component */
+group_size = s->width * channels;
+
+tmpbuf = av_malloc(ssize);
+if (!tmpbuf)
+return AVERROR(ENOMEM);
+
+if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32LE ||
+s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32LE) {
+for (i = 0; i < decoded_height; i++) {
+/* Copy first sample byte for each channel */
+for (j = 0; j < channels; j++)
+tmpbuf[j] = dst[j];
+
+/* Decode horizontal differences */
+for (j = channels; j < ssize; j++)
+tmpbuf[j] = dst[j] + tmpbuf[j-channels];
+
+/* Combine shuffled bytes from their separate groups. Each
+   byte of every floating point value in a row of pixels is
+   split and combined into separate groups. A group of all
+   the sign/exponents bytes in the row and groups for each
+   of the upper, mid, and lower mantissa bytes in the row. 
*/
+for (j = 0; j < group_size; j++) {
+for (int k = 0; k < bpc; k++) {
+dst[bpc * j + k] = tmpbuf[(bpc - k - 1) * 
group_size + j];
+}
+}
+dst += stride;
+}
+} else if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32BE ||
+   s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32BE) {
+/* Same as LE only the shuffle at the end is reversed */
+for (i = 0; i < decoded_height; i++) {
+for (j = 0; j < channels; j++)
+tmpbuf[j] = dst[j];
+
+for (j = channels; j < ssize; j++)
+tmpbuf[j] = dst[j] + tmpbuf[j-channels];
+
+for (j = 0; j < group_size; j++) {
+for (int k = 0; k < bpc; k++) {
+dst[bpc * j + k] = tmpbuf[k * group_size + j];
+}
+}
+dst += stride;
+}
+} else {
+av_log(s->avctx, AV_LOG_ERROR, "unsupported floating point 
pixel format\n");
+}
+av_free(tmpbuf);
+}
+
 if (s->photometric == TIFF_PHOTOMETRIC_WHITE_IS_ZERO) {
 int c = (s->avctx->pix_fmt == AV_PIX_FMT_PAL8 ? (1data[plane];
diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 03e794dc48..167c8ccf2c 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@@ -501,7 +501,25 @@ fate-tiff-fax-g3: CMD = framecrc -i 
$(TARGET_SAMPLES)/CCITT_fax/G31D.TIF
 FATE_TIFF += fate-tiff-fax-g3s
 fate-tiff-fax-g3s: CMD = framecrc -i $(TARGET_SAMPLES)/CCITT_fax/G31DS.TIF

-FATE_TIFF-$(call DEMDEC, IMAGE2, TIFF) += $(FATE_TIFF)
+FATE_TIFF += fate-tiff-uncompressed-rgbf32le
+fate-tiff-uncompressed-rgbf32le: CMD = framecrc -i 
$(TARGET_SAMPLES)/tiff/uncompressed_rgbf32le.tif
+
+FATE_

[FFmpeg-devel] [PATCH] avcodec/tiff: add support for decoding compressed rgb floating point formats

2022-10-01 Thread mindmark

From: Mark Reid 

floating point uses a slightly different predictor technique describe here
http://chriscox.org/TIFFTN3d1.pdf

Here is a link the test files, if someone could add them to fate me
https://www.dropbox.com/s/fg59h2os4gb4wug/tiff_fate_samples.zip


---
 libavcodec/tiff.c  | 66 +-
 tests/fate/image.mak   | 18 ++
 tests/ref/fate/tiff-lzw-rgbaf32le  |  6 ++
 tests/ref/fate/tiff-lzw-rgbf32le   |  6 ++
 tests/ref/fate/tiff-uncompressed-rgbaf32le |  6 ++
 tests/ref/fate/tiff-uncompressed-rgbf32le  |  6 ++
 tests/ref/fate/tiff-zip-rgbaf32le  |  6 ++
 tests/ref/fate/tiff-zip-rgbf32le   |  6 ++
 8 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 tests/ref/fate/tiff-lzw-rgbaf32le
 create mode 100644 tests/ref/fate/tiff-lzw-rgbf32le
 create mode 100644 tests/ref/fate/tiff-uncompressed-rgbaf32le
 create mode 100644 tests/ref/fate/tiff-uncompressed-rgbf32le
 create mode 100644 tests/ref/fate/tiff-zip-rgbaf32le
 create mode 100644 tests/ref/fate/tiff-zip-rgbf32le

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 3a610ada85..c1d07f8c3e 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -1913,7 +1913,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 TiffContext *const s = avctx->priv_data;
 unsigned off, last_off = 0;
 int le, ret, plane, planes;
-int i, j, entries, stride;
+int i, j, k, entries, stride;
 unsigned soff, ssize;
 uint8_t *dst;
 GetByteContext stripsizes;
@@ -2249,6 +2249,70 @@ again:
 }
 }

+/* Floating point predictor
+TIFF Technical Note 3 http://chriscox.org/TIFFTN3d1.pdf */
+if (s->predictor == 3) {
+int channels = s->bppcount;
+int lane_offset;
+uint8_t *tmpbuf;
+int bpc;
+
+dst   = five_planes ? five_planes : p->data[plane];
+soff  = s->bpp >> 3;
+if (s->planar) {
+soff  = FFMAX(soff / s->bppcount, 1);
+channels = 1;
+}
+ssize = s->width * soff;
+bpc = FFMAX(soff / s->bppcount, 1); /* bytes per component */
+lane_offset = s->width * channels;
+
+tmpbuf = (uint8_t*)av_malloc(ssize);
+if (!tmpbuf)
+return AVERROR(ENOMEM);
+
+if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32LE ||
+s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32LE) {
+for (i = 0; i < decoded_height; i++) {
+/* copy first sample byte for each channel */
+for (j = 0; j < channels; j++)
+tmpbuf[j] = dst[j];
+
+/* decode horizontal differences */
+for (j = channels; j < ssize; j++)
+tmpbuf[j] = dst[j] + tmpbuf[j-channels];
+
+/* combine shuffled bytes from their sepearate lanes */
+for (j = 0; j < lane_offset; j++) {
+for (k = 0; k < bpc; k++) {
+dst[bpc * j + k] = tmpbuf[(bpc - k - 1) * 
lane_offset + j];
+}
+}
+dst += stride;
+}
+} else if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32BE ||
+   s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32BE) {
+/* same as LE only the shuffle at the end is reversed */
+for (i = 0; i < decoded_height; i++) {
+for (j = 0; j < channels; j++)
+tmpbuf[j] = dst[j];
+
+for (j = channels; j < ssize; j++)
+tmpbuf[j] = dst[j] + tmpbuf[j-channels];
+
+for (j = 0; j < lane_offset; j++) {
+for (k = 0; k < bpc; k++) {
+dst[bpc * j + k] = tmpbuf[k * lane_offset + j];
+}
+}
+dst += stride;
+}
+} else {
+av_log(s->avctx, AV_LOG_ERROR, "unsupported floating point 
pixel format\n");
+}
+av_free(tmpbuf);
+}
+
 if (s->photometric == TIFF_PHOTOMETRIC_WHITE_IS_ZERO) {
 int c = (s->avctx->pix_fmt == AV_PIX_FMT_PAL8 ? (1data[plane];
diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 03e794dc48..971531520d 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@@ -501,6 +501,24 @@ fate-tiff-fax-g3: CMD = framecrc -i 
$(TARGET_SAMPLES)/CCITT_fax/G31D.TIF
 FATE_TIFF += fate-tiff-fax-g3s
 fate-tiff-fax-g3s: CMD = framecrc -i $(TARGET_SAMPLES)/CCITT_fax/G31DS.TIF

+FATE_TIFF += fate-tiff-uncompressed-rgbf32le
+fate-tiff-uncompressed-rgbf32le: CMD = framecrc -i 
$(TARGET_SAMPLES)/tiff/uncompressed_rgbf32le.tif
+
+FATE_TIFF += fate-tiff-uncompressed-rgbaf32le
+

[FFmpeg-devel] [PATCH 4/4] swscale/output: add rgbaf32 output support

2022-09-29 Thread mindmark

From: Mark Reid 

---
 libswscale/output.c  | 89 
 libswscale/swscale_unscaled.c|  4 +-
 libswscale/tests/floatimg_cmp.c  |  4 +-
 libswscale/utils.c   | 16 +++--
 libswscale/yuv2rgb.c |  2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |  1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |  1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 tests/ref/fate/sws-floatimg-cmp  | 16 +
 20 files changed, 167 insertions(+), 8 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 0e1c1225a0..b3e064ae8d 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2474,6 +2474,89 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t *dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
+int pixelStep = desc->flags & AV_PIX_FMT_FLAG_ALPHA ? 4 : 3;
+uint32_t *dest32 = (uint32_t*)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += 1 << 13;
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(Y + R, 30);
+G = av_clip_uintp2(Y + G, 30);
+B = av_clip_uintp2(Y + B, 30);
+
+dest32[0] = av_float2int(float_mult * (float)(R >> 14));
+dest32[1] = av_float2int(float_mult * (float)(G >> 14));
+dest32[2] = av_float2int(float_mult * (float)(B >> 14));
+if (hasAlpha)
+dest32[3] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 
30) >> 14));
+
+dest32 += pixelStep;
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+dest32 = (uint32_t*)dest;
+for (i = 0; i < dstW; i++) {
+dest32[0] = av_bswap32(dest32[0]);
+dest32[1] = av_bswap32(dest32[1]);
+dest32[2] = av_bswap32(dest32[2]);
+if (hasAlpha)
+dest32[3] = av_bswap32(dest32[3]);
+
+dest32 += pixelStep;
+}
+}
+
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2986,6 +3069,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 }
 break;
 
+case AV_PIX_FMT_RGBF32LE:
+case AV_PIX_FMT_RGBF32BE:
+case AV_PIX_FMT_RGBAF32LE:
+case AV_PIX_FMT_RGBAF32BE:
+*yuv2packedX = yuv2rgbaf32_full_X_c;
+break;
 case AV_PIX_FMT_RGB24:
 *yuv2packedX = yuv2rgb24_full_X_c;
 *yuv2packed2 = yuv2rgb24_full_2_c;

[FFmpeg-devel] [PATCH 3/4] avfilter/vf_transpose: add support for packed rgb float formats

2022-09-29 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_transpose.c | 44 ++
 1 file changed, 44 insertions(+)

diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 469e66729f..1023d6fe82 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -174,6 +174,46 @@ static void transpose_8x8_64_c(uint8_t *src, ptrdiff_t 
src_linesize,
 transpose_block_64_c(src, src_linesize, dst, dst_linesize, 8, 8);
 }
 
+static inline void transpose_block_96_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize,
+int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 12) {
+for (x = 0; x < w; x++) {
+*((uint32_t *)(dst+0 + 12*x)) = *((uint32_t *)(src+0 + 
x*src_linesize));
+*((uint32_t *)(dst+4 + 12*x)) = *((uint32_t *)(src+4 + 
x*src_linesize));
+*((uint32_t *)(dst+8 + 12*x)) = *((uint32_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_96_c(uint8_t *src, ptrdiff_t src_linesize,
+   uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_96_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
+
+static inline void transpose_block_128_c(uint8_t *src, ptrdiff_t src_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ int w, int h)
+{
+int x, y;
+for (y = 0; y < h; y++, dst += dst_linesize, src += 16) {
+for (x = 0; x < w; x++) {
+*((uint64_t *)(dst+0 + 16*x)) = *((uint64_t *)(src+0 + 
x*src_linesize));
+*((uint64_t *)(dst+8 + 16*x)) = *((uint64_t *)(src+8 + 
x*src_linesize));
+}
+}
+}
+
+static void transpose_8x8_128_c(uint8_t *src, ptrdiff_t src_linesize,
+uint8_t *dst, ptrdiff_t dst_linesize)
+{
+transpose_block_128_c(src, src_linesize, dst, dst_linesize, 8, 8);
+}
+
 static int config_props_output(AVFilterLink *outlink)
 {
 AVFilterContext *ctx = outlink->src;
@@ -232,6 +272,10 @@ static int config_props_output(AVFilterLink *outlink)
 v->transpose_8x8   = transpose_8x8_48_c; break;
 case 8: v->transpose_block = transpose_block_64_c;
 v->transpose_8x8   = transpose_8x8_64_c; break;
+case 12: v->transpose_block = transpose_block_96_c;
+ v->transpose_8x8   = transpose_8x8_96_c; break;
+case 16: v->transpose_block = transpose_block_128_c;
+ v->transpose_8x8   = transpose_8x8_128_c; break;
 }
 }
 
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/4] avfilter/vf_hflip: add support for packed rgb float formats

2022-09-29 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_hflip_init.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/libavfilter/vf_hflip_init.h b/libavfilter/vf_hflip_init.h
index d0319f463d..31173f73fc 100644
--- a/libavfilter/vf_hflip_init.h
+++ b/libavfilter/vf_hflip_init.h
@@ -86,6 +86,29 @@ static void hflip_qword_c(const uint8_t *ssrc, uint8_t 
*ddst, int w)
 dst[j] = src[-j];
 }
 
+static void hflip_b96_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint32_t *in = (const uint32_t *)ssrc;
+uint32_t *out = (uint32_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 3, in -= 3) {
+out[0] = in[0];
+out[1] = in[1];
+out[2] = in[2];
+}
+}
+
+static void hflip_b128_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+const uint64_t *in = (const uint64_t *)ssrc;
+uint64_t *out = (uint64_t *)ddst;
+
+for (int j = 0; j < w; j++, out += 2, in -= 2) {
+out[0] = in[0];
+out[1] = in[1];
+}
+}
+
 static av_unused int ff_hflip_init(FlipContext *s, int step[4], int nb_planes)
 {
 for (int i = 0; i < nb_planes; i++) {
@@ -97,6 +120,8 @@ static av_unused int ff_hflip_init(FlipContext *s, int 
step[4], int nb_planes)
 case 4: s->flip_line[i] = hflip_dword_c; break;
 case 6: s->flip_line[i] = hflip_b48_c;   break;
 case 8: s->flip_line[i] = hflip_qword_c; break;
+case 12: s->flip_line[i] = hflip_b96_c; break;
+case 16: s->flip_line[i] = hflip_b128_c; break;
 default:
 return AVERROR_BUG;
 }
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/4] swscale/input: add rgbaf32 input support

2022-09-29 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c | 172 +
 libswscale/utils.c |   4 ++
 2 files changed, 176 insertions(+)

diff --git a/libswscale/input.c b/libswscale/input.c
index 7ff7bfaa01..4683284b0b 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, 
const uint8_t *_src, cons
 rgbaf16_funcs_endian(le, 0)
 rgbaf16_funcs_endian(be, 1)
 
+#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): 
av_int2float(AV_RL32(&src)))
+
+static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+ const float *src, int 
width,
+ int32_t *rgb2yuv, int 
comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 
65535.0f))) >> 1;
+int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 
65535.0f))) >> 1;
+int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 
65535.0f)) +
+ lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 
65535.0f))) >> 1;
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t 
*dstV, int is_be,
+const float *src, int width,
+int32_t *rgb2yuv, int comp)
+{
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, int32_t *rgb2yuv, 
int comp)
+{
+int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+int i;
+for (i = 0; i < width; i++) {
+int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 
65535.0f));
+int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 
65535.0f));
+int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 
65535.0f));
+
+dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> 
RGB2YUV_SHIFT;
+}
+}
+
+static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float 
*src, int is_be,
+   int width, void *opq)
+{
+int i;
+for (i=0; isrcFormat;
@@ -1570,6 +1700,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_half_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_half_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_half_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_half_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_half_c;
+break;
 }
 } else {
 switch (srcFormat) {
@@ -1663,6 +1805,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->chrToYV12 = rgbaf16leToUV_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+c->chrToYV12 = rgbf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32BE:
+c->chrToYV12 = rgbaf32beToUV_c;
+break;
+case AV_PIX_FMT_RGBF32LE:
+c->chrToYV12 = rgbf32leToUV_c;
+break;
+case AV_PIX_FMT_RGBAF32LE:
+c->chrToYV12 = rgbaf32leToUV_c;
+break;
 }
 }
 
@@ -1973,6 +2127,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_RGBAF16LE:
 c->lumToYV12 = rgbaf16leToY_c;
 break;
+case AV_PIX_FMT_RGBF32BE:
+

[FFmpeg-devel] [PATCH 0/4] swscale rgbaf32 input/output support

2022-09-29 Thread mindmark

From: Mark Reid 

This patch series adds swscale input/output support for the packed rgb float 
formats.
A few of the filters also needed support the larger 96/128 bit packed pixel 
sizes.

I also plan to eventually add lossless unscaled conversions between the planer 
and packed formats.

Mark Reid (4):
  swscale/input: add rgbaf32 input support
  avfilter/vf_hflip: add support for packed rgb float formats
  avfilter/vf_transpose: add support for packed rgb float formats
  swscale/output: add rgbaf32 output support

 libavfilter/vf_hflip_init.h  |  25 
 libavfilter/vf_transpose.c   |  44 ++
 libswscale/input.c   | 172 +++
 libswscale/output.c  |  89 
 libswscale/swscale_unscaled.c|   4 +-
 libswscale/tests/floatimg_cmp.c  |   4 +-
 libswscale/utils.c   |  12 +-
 libswscale/yuv2rgb.c |   2 +
 tests/ref/fate/filter-pixdesc-rgbaf32be  |   1 +
 tests/ref/fate/filter-pixdesc-rgbaf32le  |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32be   |   1 +
 tests/ref/fate/filter-pixdesc-rgbf32le   |   1 +
 tests/ref/fate/filter-pixfmts-copy   |   4 +
 tests/ref/fate/filter-pixfmts-crop   |   4 +
 tests/ref/fate/filter-pixfmts-field  |   4 +
 tests/ref/fate/filter-pixfmts-fieldorder |   4 +
 tests/ref/fate/filter-pixfmts-hflip  |   4 +
 tests/ref/fate/filter-pixfmts-il |   4 +
 tests/ref/fate/filter-pixfmts-null   |   4 +
 tests/ref/fate/filter-pixfmts-scale  |   4 +
 tests/ref/fate/filter-pixfmts-transpose  |   4 +
 tests/ref/fate/filter-pixfmts-vflip  |   4 +
 tests/ref/fate/sws-floatimg-cmp  |  16 +++
 23 files changed, 408 insertions(+), 4 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le

--
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v3 2/2] swscale/x86/input.asm: add x86-optimized planer rgb2yuv functions

2021-11-24 Thread mindmark

From: Mark Reid 

sse2 only operates on 2 lanes per loop for to_y and to_uv functions, due
to the lack of pmulld instruction.  Emulating pmulld with 2 pmuludq and shuffles
proved too costly and made to_uv functions slower then the c implementation.

For to_y on sse2 only float functions are generated,
I was are not able outperform the c implementation on the integer pixel formats.

For to_a on see4 only the float functions are generated.
sse2 and sse4 generated nearly identical performing code on integer pixel 
formats,
so only sse2/avx2 versions are generated.

planar_gbrp_to_y_512_c: 1197.5
planar_gbrp_to_y_512_sse4: 444.5
planar_gbrp_to_y_512_avx2: 287.5
planar_gbrap_to_y_512_c: 1204.5
planar_gbrap_to_y_512_sse4: 447.5
planar_gbrap_to_y_512_avx2: 289.5
planar_gbrp9be_to_y_512_c: 1380.0
planar_gbrp9be_to_y_512_sse4: 543.5
planar_gbrp9be_to_y_512_avx2: 340.0
planar_gbrp9le_to_y_512_c: 1200.5
planar_gbrp9le_to_y_512_sse4: 442.0
planar_gbrp9le_to_y_512_avx2: 282.0
planar_gbrp10be_to_y_512_c: 1378.5
planar_gbrp10be_to_y_512_sse4: 544.0
planar_gbrp10be_to_y_512_avx2: 337.5
planar_gbrp10le_to_y_512_c: 1200.0
planar_gbrp10le_to_y_512_sse4: 448.0
planar_gbrp10le_to_y_512_avx2: 285.5
planar_gbrap10be_to_y_512_c: 1380.0
planar_gbrap10be_to_y_512_sse4: 542.0
planar_gbrap10be_to_y_512_avx2: 340.5
planar_gbrap10le_to_y_512_c: 1199.0
planar_gbrap10le_to_y_512_sse4: 446.0
planar_gbrap10le_to_y_512_avx2: 289.5
planar_gbrp12be_to_y_512_c: 10563.0
planar_gbrp12be_to_y_512_sse4: 542.5
planar_gbrp12be_to_y_512_avx2: 339.0
planar_gbrp12le_to_y_512_c: 1201.0
planar_gbrp12le_to_y_512_sse4: 440.5
planar_gbrp12le_to_y_512_avx2: 286.0
planar_gbrap12be_to_y_512_c: 1701.5
planar_gbrap12be_to_y_512_sse4: 917.0
planar_gbrap12be_to_y_512_avx2: 338.5
planar_gbrap12le_to_y_512_c: 1201.0
planar_gbrap12le_to_y_512_sse4: 444.5
planar_gbrap12le_to_y_512_avx2: 288.0
planar_gbrp14be_to_y_512_c: 1370.5
planar_gbrp14be_to_y_512_sse4: 545.0
planar_gbrp14be_to_y_512_avx2: 338.5
planar_gbrp14le_to_y_512_c: 1199.0
planar_gbrp14le_to_y_512_sse4: 444.0
planar_gbrp14le_to_y_512_avx2: 279.5
planar_gbrp16be_to_y_512_c: 1364.0
planar_gbrp16be_to_y_512_sse4: 544.5
planar_gbrp16be_to_y_512_avx2: 339.5
planar_gbrp16le_to_y_512_c: 1201.0
planar_gbrp16le_to_y_512_sse4: 445.5
planar_gbrp16le_to_y_512_avx2: 280.5
planar_gbrap16be_to_y_512_c: 1377.0
planar_gbrap16be_to_y_512_sse4: 545.0
planar_gbrap16be_to_y_512_avx2: 338.5
planar_gbrap16le_to_y_512_c: 1201.0
planar_gbrap16le_to_y_512_sse4: 442.0
planar_gbrap16le_to_y_512_avx2: 279.0
planar_gbrpf32be_to_y_512_c: 4113.0
planar_gbrpf32be_to_y_512_sse2: 2438.0
planar_gbrpf32be_to_y_512_sse4: 1068.0
planar_gbrpf32be_to_y_512_avx2: 904.5
planar_gbrpf32le_to_y_512_c: 3818.5
planar_gbrpf32le_to_y_512_sse2: 2024.5
planar_gbrpf32le_to_y_512_sse4: 1241.5
planar_gbrpf32le_to_y_512_avx2: 657.0
planar_gbrapf32be_to_y_512_c: 3707.0
planar_gbrapf32be_to_y_512_sse2: 2444.0
planar_gbrapf32be_to_y_512_sse4: 1077.0
planar_gbrapf32be_to_y_512_avx2: 909.0
planar_gbrapf32le_to_y_512_c: 3822.0
planar_gbrapf32le_to_y_512_sse2: 2024.5
planar_gbrapf32le_to_y_512_sse4: 1176.0
planar_gbrapf32le_to_y_512_avx2: 658.5

planar_gbrp_to_uv_512_c: 2325.8
planar_gbrp_to_uv_512_sse2: 1726.8
planar_gbrp_to_uv_512_sse4: 771.8
planar_gbrp_to_uv_512_avx2: 506.8
planar_gbrap_to_uv_512_c: 2281.8
planar_gbrap_to_uv_512_sse2: 1726.3
planar_gbrap_to_uv_512_sse4: 768.3
planar_gbrap_to_uv_512_avx2: 496.3
planar_gbrp9be_to_uv_512_c: 2336.8
planar_gbrp9be_to_uv_512_sse2: 1924.8
planar_gbrp9be_to_uv_512_sse4: 852.3
planar_gbrp9be_to_uv_512_avx2: 552.8
planar_gbrp9le_to_uv_512_c: 2270.3
planar_gbrp9le_to_uv_512_sse2: 1512.3
planar_gbrp9le_to_uv_512_sse4: 764.3
planar_gbrp9le_to_uv_512_avx2: 491.3
planar_gbrp10be_to_uv_512_c: 2281.8
planar_gbrp10be_to_uv_512_sse2: 1917.8
planar_gbrp10be_to_uv_512_sse4: 855.3
planar_gbrp10be_to_uv_512_avx2: 541.3
planar_gbrp10le_to_uv_512_c: 2269.8
planar_gbrp10le_to_uv_512_sse2: 1515.3
planar_gbrp10le_to_uv_512_sse4: 759.8
planar_gbrp10le_to_uv_512_avx2: 487.8
planar_gbrap10be_to_uv_512_c: 2382.3
planar_gbrap10be_to_uv_512_sse2: 1924.8
planar_gbrap10be_to_uv_512_sse4: 855.3
planar_gbrap10be_to_uv_512_avx2: 540.8
planar_gbrap10le_to_uv_512_c: 2382.3
planar_gbrap10le_to_uv_512_sse2: 1512.3
planar_gbrap10le_to_uv_512_sse4: 759.3
planar_gbrap10le_to_uv_512_avx2: 484.8
planar_gbrp12be_to_uv_512_c: 2283.8
planar_gbrp12be_to_uv_512_sse2: 1936.8
planar_gbrp12be_to_uv_512_sse4: 858.3
planar_gbrp12be_to_uv_512_avx2: 541.3
planar_gbrp12le_to_uv_512_c: 2278.8
planar_gbrp12le_to_uv_512_sse2: 1507.3
planar_gbrp12le_to_uv_512_sse4: 760.3
planar_gbrp12le_to_uv_512_avx2: 485.8
planar_gbrap12be_to_uv_512_c: 2385.3
planar_gbrap12be_to_uv_512_sse2: 1927.8
planar_gbrap12be_to_uv_512_sse4: 855.3
planar_gbrap12be_to_uv_512_avx2: 539.8
planar_gbrap12le_to_uv_512_c: 2377.3
planar_gbrap12le_to_uv_512_sse2: 1516.3
planar_gbrap12le_to_uv_512_sse4: 759.3
planar_gbrap12le_to_uv_512_avx2: 484.8
planar_gbrp14be_to_uv_512_c: 2283.8
planar_gbrp14be_to_uv_512_sse2:

[FFmpeg-devel] [PATCH v3 1/2] swscale/x86/output.asm: add x86-optimized planer gbr yuv2anyX functions

2021-11-24 Thread mindmark

From: Mark Reid 

changes since v2:
 * fixed label
changes since v1:
 * remove vex intruction on sse4 path
 * some load/pack marcos use less intructions
 * fixed some typos

yuv2gbrp_full_X_4_512_c: 12757.6
yuv2gbrp_full_X_4_512_sse2: 8946.6
yuv2gbrp_full_X_4_512_sse4: 5138.6
yuv2gbrp_full_X_4_512_avx2: 3889.6
yuv2gbrap_full_X_4_512_c: 15368.6
yuv2gbrap_full_X_4_512_sse2: 11916.1
yuv2gbrap_full_X_4_512_sse4: 6294.6
yuv2gbrap_full_X_4_512_avx2: 3477.1
yuv2gbrp9be_full_X_4_512_c: 14381.6
yuv2gbrp9be_full_X_4_512_sse2: 9139.1
yuv2gbrp9be_full_X_4_512_sse4: 5150.1
yuv2gbrp9be_full_X_4_512_avx2: 2834.6
yuv2gbrp9le_full_X_4_512_c: 12990.1
yuv2gbrp9le_full_X_4_512_sse2: 9118.1
yuv2gbrp9le_full_X_4_512_sse4: 5132.1
yuv2gbrp9le_full_X_4_512_avx2: 2833.1
yuv2gbrp10be_full_X_4_512_c: 14401.6
yuv2gbrp10be_full_X_4_512_sse2: 9133.1
yuv2gbrp10be_full_X_4_512_sse4: 5126.1
yuv2gbrp10be_full_X_4_512_avx2: 2837.6
yuv2gbrp10le_full_X_4_512_c: 12718.1
yuv2gbrp10le_full_X_4_512_sse2: 9106.1
yuv2gbrp10le_full_X_4_512_sse4: 5120.1
yuv2gbrp10le_full_X_4_512_avx2: 2826.1
yuv2gbrap10be_full_X_4_512_c: 18535.6
yuv2gbrap10be_full_X_4_512_sse2: 33617.6
yuv2gbrap10be_full_X_4_512_sse4: 6264.1
yuv2gbrap10be_full_X_4_512_avx2: 3422.1
yuv2gbrap10le_full_X_4_512_c: 16724.1
yuv2gbrap10le_full_X_4_512_sse2: 11787.1
yuv2gbrap10le_full_X_4_512_sse4: 6282.1
yuv2gbrap10le_full_X_4_512_avx2: 3441.6
yuv2gbrp12be_full_X_4_512_c: 13723.6
yuv2gbrp12be_full_X_4_512_sse2: 9128.1
yuv2gbrp12be_full_X_4_512_sse4: 7997.6
yuv2gbrp12be_full_X_4_512_avx2: 2844.1
yuv2gbrp12le_full_X_4_512_c: 12257.1
yuv2gbrp12le_full_X_4_512_sse2: 9107.6
yuv2gbrp12le_full_X_4_512_sse4: 5142.6
yuv2gbrp12le_full_X_4_512_avx2: 2837.6
yuv2gbrap12be_full_X_4_512_c: 18511.1
yuv2gbrap12be_full_X_4_512_sse2: 12156.6
yuv2gbrap12be_full_X_4_512_sse4: 6251.1
yuv2gbrap12be_full_X_4_512_avx2: 3444.6
yuv2gbrap12le_full_X_4_512_c: 16687.1
yuv2gbrap12le_full_X_4_512_sse2: 11785.1
yuv2gbrap12le_full_X_4_512_sse4: 6243.6
yuv2gbrap12le_full_X_4_512_avx2: 3446.1
yuv2gbrp14be_full_X_4_512_c: 13690.6
yuv2gbrp14be_full_X_4_512_sse2: 9120.6
yuv2gbrp14be_full_X_4_512_sse4: 5138.1
yuv2gbrp14be_full_X_4_512_avx2: 2843.1
yuv2gbrp14le_full_X_4_512_c: 14995.6
yuv2gbrp14le_full_X_4_512_sse2: 9119.1
yuv2gbrp14le_full_X_4_512_sse4: 5126.1
yuv2gbrp14le_full_X_4_512_avx2: 2843.1
yuv2gbrp16be_full_X_4_512_c: 12367.1
yuv2gbrp16be_full_X_4_512_sse2: 8233.6
yuv2gbrp16be_full_X_4_512_sse4: 4820.1
yuv2gbrp16be_full_X_4_512_avx2: 2666.6
yuv2gbrp16le_full_X_4_512_c: 10904.1
yuv2gbrp16le_full_X_4_512_sse2: 8214.1
yuv2gbrp16le_full_X_4_512_sse4: 4824.1
yuv2gbrp16le_full_X_4_512_avx2: 2629.1
yuv2gbrap16be_full_X_4_512_c: 26569.6
yuv2gbrap16be_full_X_4_512_sse2: 10884.1
yuv2gbrap16be_full_X_4_512_sse4: 5488.1
yuv2gbrap16be_full_X_4_512_avx2: 3272.1
yuv2gbrap16le_full_X_4_512_c: 14010.1
yuv2gbrap16le_full_X_4_512_sse2: 10562.1
yuv2gbrap16le_full_X_4_512_sse4: 5463.6
yuv2gbrap16le_full_X_4_512_avx2: 3255.1
yuv2gbrpf32be_full_X_4_512_c: 14524.1
yuv2gbrpf32be_full_X_4_512_sse2: 8552.6
yuv2gbrpf32be_full_X_4_512_sse4: 4636.1
yuv2gbrpf32be_full_X_4_512_avx2: 2474.6
yuv2gbrpf32le_full_X_4_512_c: 13060.6
yuv2gbrpf32le_full_X_4_512_sse2: 9682.6
yuv2gbrpf32le_full_X_4_512_sse4: 4298.1
yuv2gbrpf32le_full_X_4_512_avx2: 2453.1
yuv2gbrapf32be_full_X_4_512_c: 18629.6
yuv2gbrapf32be_full_X_4_512_sse2: 11363.1
yuv2gbrapf32be_full_X_4_512_sse4: 15201.6
yuv2gbrapf32be_full_X_4_512_avx2: 3727.1
yuv2gbrapf32le_full_X_4_512_c: 16677.6
yuv2gbrapf32le_full_X_4_512_sse2: 10221.6
yuv2gbrapf32le_full_X_4_512_sse4: 5693.6
yuv2gbrapf32le_full_X_4_512_avx2: 3656.6
---
 libswscale/x86/output.asm | 434 +-
 libswscale/x86/swscale.c  |  98 +
 tests/checkasm/Makefile   |   2 +-
 tests/checkasm/checkasm.c |   1 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/sw_gbrp.c  | 198 +
 tests/fate/checkasm.mak   |   1 +
 7 files changed, 733 insertions(+), 2 deletions(-)
 create mode 100644 tests/checkasm/sw_gbrp.c

diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 52cf9f2c2e..e277a61449 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -38,7 +38,49 @@ pw_32: times 8 dw 32
 pd_255:times 8 dd 255
 pw_512:times 8 dw 512
 pw_1024:   times 8 dw 1024
-
+pd_65535_invf: times 8 dd 0x37800080 ;1.0/65535.0
+pd_yuv2gbrp16_start:   times 8 dd -0x4000
+pd_yuv2gbrp_y_start:   times 8 dd  (1 << 9)
+pd_yuv2gbrp_uv_start:  times 8 dd  ((1 << 9) - (128 << 19))
+pd_yuv2gbrp_a_start:   times 8 dd  (1 << 18)
+pd_yuv2gbrp16_offset:  times 8 dd  0x1  ;(1 << 16)
+pd_yuv2gbrp16_round13: times 8 dd  0x02000  ;(1 << 13)
+pd_yuv2gbrp16_a_offset:times 8 dd  0x20002000
+pd_yuv2gbrp16_upper30: times 8 dd  0x3FFF ;(1<<30) - 1
+pd_yuv2gbrp16_upper27: times 8 dd  0x07FF ;(1<<27) - 1
+pd_yuv2gbrp16_upperC:  times 8 dd  0xC000
+pb_pack_shuffle8:   db  0,  4,  8, 12, \
+

[FFmpeg-devel] [PATCH v3 2/2] swscale/input: clip rgbf32 values before lrintf

2021-11-14 Thread mindmark

From: Mark Reid 

if the float pixel * 65535.0f > 2147483647.0f
lrintf may overfow and return negative values, depending on implementation.
nan and +/-inf values may also be implementation defined

clip the value first so lrintf always works.

values < 0.0f, -inf, nan = 0.0f
values > 65535.0f, +inf  = 65535.0f

old timings
 195960 decicycles in planar_rgbf32le_to_uv,   1 runs,  0 skips
 186120 decicycles in planar_rgbf32le_to_uv,   2 runs,  0 skips
 188645 decicycles in planar_rgbf32le_to_uv,   4 runs,  0 skips
 183625 decicycles in planar_rgbf32le_to_uv,   8 runs,  0 skips
 181157 decicycles in planar_rgbf32le_to_uv,  16 runs,  0 skips
 177533 decicycles in planar_rgbf32le_to_uv,  32 runs,  0 skips
 175689 decicycles in planar_rgbf32le_to_uv,  64 runs,  0 skips

 232960 decicycles in planar_rgbf32be_to_uv,   1 runs,  0 skips
 221380 decicycles in planar_rgbf32be_to_uv,   2 runs,  0 skips
 216640 decicycles in planar_rgbf32be_to_uv,   4 runs,  0 skips
 213505 decicycles in planar_rgbf32be_to_uv,   8 runs,  0 skips
 211558 decicycles in planar_rgbf32be_to_uv,  16 runs,  0 skips
 210596 decicycles in planar_rgbf32be_to_uv,  32 runs,  0 skips
 210202 decicycles in planar_rgbf32be_to_uv,  64 runs,  0 skips

 161680 decicycles in planar_rgbf32le_to_y,   1 runs,  0 skips
 153540 decicycles in planar_rgbf32le_to_y,   2 runs,  0 skips
 148255 decicycles in planar_rgbf32le_to_y,   4 runs,  0 skips
 140600 decicycles in planar_rgbf32le_to_y,   8 runs,  0 skips
 132935 decicycles in planar_rgbf32le_to_y,  16 runs,  0 skips
 128531 decicycles in planar_rgbf32le_to_y,  32 runs,  0 skips
 140933 decicycles in planar_rgbf32le_to_y,  64 runs,  0 skips

 190980 decicycles in planar_rgbf32be_to_y,   1 runs,  0 skips
 176080 decicycles in planar_rgbf32be_to_y,   2 runs,  0 skips
 167980 decicycles in planar_rgbf32be_to_y,   4 runs,  0 skips
 164685 decicycles in planar_rgbf32be_to_y,   8 runs,  0 skips
 162751 decicycles in planar_rgbf32be_to_y,  16 runs,  0 skips
 162404 decicycles in planar_rgbf32be_to_y,  32 runs,  0 skips
 167849 decicycles in planar_rgbf32be_to_y,  64 runs,  0 skips

new timings
 183320 decicycles in planar_rgbf32le_to_uv,   1 runs,  0 skips
 175700 decicycles in planar_rgbf32le_to_uv,   2 runs,  0 skips
 179570 decicycles in planar_rgbf32le_to_uv,   4 runs,  0 skips
 172932 decicycles in planar_rgbf32le_to_uv,   8 runs,  0 skips
 168707 decicycles in planar_rgbf32le_to_uv,  16 runs,  0 skips
 165224 decicycles in planar_rgbf32le_to_uv,  32 runs,  0 skips
 163423 decicycles in planar_rgbf32le_to_uv,  64 runs,  0 skips

 184940 decicycles in planar_rgbf32be_to_uv,   1 runs,  0 skips
 185150 decicycles in planar_rgbf32be_to_uv,   2 runs,  0 skips
 185790 decicycles in planar_rgbf32be_to_uv,   4 runs,  0 skips
 185472 decicycles in planar_rgbf32be_to_uv,   8 runs,  0 skips
 185277 decicycles in planar_rgbf32be_to_uv,  16 runs,  0 skips
 185813 decicycles in planar_rgbf32be_to_uv,  32 runs,  0 skips
 185332 decicycles in planar_rgbf32be_to_uv,  64 runs,  0 skips

 145400 decicycles in planar_rgbf32le_to_y,   1 runs,  0 skips
 145100 decicycles in planar_rgbf32le_to_y,   2 runs,  0 skips
 143490 decicycles in planar_rgbf32le_to_y,   4 runs,  0 skips
 136687 decicycles in planar_rgbf32le_to_y,   8 runs,  0 skips
 131271 decicycles in planar_rgbf32le_to_y,  16 runs,  0 skips
 128698 decicycles in planar_rgbf32le_to_y,  32 runs,  0 skips
 127170 decicycles in planar_rgbf32le_to_y,  64 runs,  0 skips

 156020 decicycles in planar_rgbf32be_to_y,   1 runs,  0 skips
 146990 decicycles in planar_rgbf32be_to_y,   2 runs,  0 skips
 142020 decicycles in planar_rgbf32be_to_y,   4 runs,  0 skips
 141052 decicycles in planar_rgbf32be_to_y,   8 runs,  0 skips
 138973 decicycles in planar_rgbf32be_to_y,  16 runs,  0 skips
 138027 decicycles in planar_rgbf32be_to_y,  32 runs,  0 skips
 143939 decicycles in planar_rgbf32be_to_y,  64 runs,  0 skips

---
 libswscale/input.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 90efdd2ffc..1351ea5bd4 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -973,7 +973,7 @@ static av_always_inline void planar_rgbf32_to_a(uint8_t 
*_dst, const uint8_t *_s
 uint16_t *dst= (uint16_t *)_dst;

 for (i = 0; i < width; i++) {
-dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i)));
+dst[i] = lrintf(av_clipf(65535.0f * rdpx(src[3] + i), 0.0f, 65535.0f));
 }
 }

@@ -987,9 +987,9 @@ static av_always_inline void planar_rgbf

[FFmpeg-devel] [PATCH v3 1/2] libavutil/common: clip nan value to amin

2021-11-14 Thread mindmark

From: Mark Reid 

Changes av_clipf to return amin if a is nan.
Before if a is nan av_clipf_c returned nan and
av_clipf_sse would return amax. Now the both
should behave the same.

This works because nan > amin is false.
The max(nan, amin) will be amin.
---
 libavutil/common.h  | 12 ++--
 libavutil/x86/intmath.h |  8 
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libavutil/common.h b/libavutil/common.h
index 3cc1f07566..9338bda7d5 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -379,6 +379,8 @@ static av_always_inline int64_t av_sat_sub64_c(int64_t a, 
int64_t b) {

 /**
  * Clip a float value into the amin-amax range.
+ * If a is nan or -inf amin will be returned.
+ * If a is +inf amax will be returned.
  * @param a value to clip
  * @param amin minimum value of the clip range
  * @param amax maximum value of the clip range
@@ -389,13 +391,13 @@ static av_always_inline av_const float av_clipf_c(float 
a, float amin, float ama
 #if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
 if (amin > amax) abort();
 #endif
-if  (a < amin) return amin;
-else if (a > amax) return amax;
-else   return a;
+return FFMIN(FFMAX(a, amin), amax);
 }

 /**
  * Clip a double value into the amin-amax range.
+ * If a is nan or -inf amin will be returned.
+ * If a is +inf amax will be returned.
  * @param a value to clip
  * @param amin minimum value of the clip range
  * @param amax maximum value of the clip range
@@ -406,9 +408,7 @@ static av_always_inline av_const double av_clipd_c(double 
a, double amin, double
 #if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
 if (amin > amax) abort();
 #endif
-if  (a < amin) return amin;
-else if (a > amax) return amax;
-else   return a;
+return FFMIN(FFMAX(a, amin), amax);
 }

 /** Compute ceil(log2(x)).
diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
index 40743fd13e..1520c25ec9 100644
--- a/libavutil/x86/intmath.h
+++ b/libavutil/x86/intmath.h
@@ -110,8 +110,8 @@ static av_always_inline av_const double 
av_clipd_sse2(double a, double amin, dou
 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
 if (amin > amax) abort();
 #endif
-__asm__ ("minsd %2, %0 \n\t"
- "maxsd %1, %0 \n\t"
+__asm__ ("maxsd %1, %0 \n\t"
+ "minsd %2, %0 \n\t"
  : "+&x"(a) : "xm"(amin), "xm"(amax));
 return a;
 }
@@ -126,8 +126,8 @@ static av_always_inline av_const float av_clipf_sse(float 
a, float amin, float a
 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
 if (amin > amax) abort();
 #endif
-__asm__ ("minss %2, %0 \n\t"
- "maxss %1, %0 \n\t"
+__asm__ ("maxss %1, %0 \n\t"
+ "minss %2, %0 \n\t"
  : "+&x"(a) : "xm"(amin), "xm"(amax));
 return a;
 }
--
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 2/2] swscale/input: clamp rgbf32 values before lrintf

2021-11-13 Thread mindmark

From: Mark Reid 

if the float pixel * 65535.0f > 2147483647.0f
lrintf may overfow and return negative values, depending on implementation.
nan and +/-inf values may also be implementation defined

clamp the value first so lrintf so, always works.

values <=0.0f, -inf, nan = 0.0f
values >=1.0f, +inf  = 1.0f

the clamping adds some performance overhead, but using a inline function
seems help the compiler optimize on the compiliers I tested.

old timings
 213920 UNITS in planar_rgbf32le_to_uv,   1 runs,  0 skips
 218830 UNITS in planar_rgbf32le_to_uv,   2 runs,  0 skips
 223285 UNITS in planar_rgbf32le_to_uv,   4 runs,  0 skips
 215405 UNITS in planar_rgbf32le_to_uv,   8 runs,  0 skips
 208920 UNITS in planar_rgbf32le_to_uv,  16 runs,  0 skips
 205115 UNITS in planar_rgbf32le_to_uv,  32 runs,  0 skips
 212220 UNITS in planar_rgbf32le_to_uv,  64 runs,  0 skips

 216440 UNITS in planar_rgbf32be_to_uv,   1 runs,  0 skips
 222450 UNITS in planar_rgbf32be_to_uv,   2 runs,  0 skips
 228780 UNITS in planar_rgbf32be_to_uv,   4 runs,  0 skips
 226900 UNITS in planar_rgbf32be_to_uv,   8 runs,  0 skips
 223168 UNITS in planar_rgbf32be_to_uv,  16 runs,  0 skips
 249340 UNITS in planar_rgbf32be_to_uv,  32 runs,  0 skips
 233746 UNITS in planar_rgbf32be_to_uv,  64 runs,  0 skips

 173360 UNITS in planar_rgbf32le_to_y,   1 runs,  0 skips
 179970 UNITS in planar_rgbf32le_to_y,   2 runs,  0 skips
 182960 UNITS in planar_rgbf32le_to_y,   4 runs,  0 skips
 177040 UNITS in planar_rgbf32le_to_y,   8 runs,  0 skips
 170351 UNITS in planar_rgbf32le_to_y,  16 runs,  0 skips
 167136 UNITS in planar_rgbf32le_to_y,  32 runs,  0 skips
 165821 UNITS in planar_rgbf32le_to_y,  64 runs,  0 skips

 181040 UNITS in planar_rgbf32be_to_y,   1 runs,  0 skips
 182920 UNITS in planar_rgbf32be_to_y,   2 runs,  0 skips
 180935 UNITS in planar_rgbf32be_to_y,   4 runs,  0 skips
 180897 UNITS in planar_rgbf32be_to_y,   8 runs,  0 skips
 179640 UNITS in planar_rgbf32be_to_y,  16 runs,  0 skips
 178912 UNITS in planar_rgbf32be_to_y,  32 runs,  0 skips
 177983 UNITS in planar_rgbf32be_to_y,  64 runs,  0 skips

new timings
 228860 UNITS in planar_rgbf32le_to_uv,   1 runs,  0 skips
 232400 UNITS in planar_rgbf32le_to_uv,   2 runs,  0 skips
 237270 UNITS in planar_rgbf32le_to_uv,   4 runs,  0 skips
 229992 UNITS in planar_rgbf32le_to_uv,   8 runs,  0 skips
 70 UNITS in planar_rgbf32le_to_uv,  16 runs,  0 skips
 218896 UNITS in planar_rgbf32le_to_uv,  32 runs,  0 skips
 216938 UNITS in planar_rgbf32le_to_uv,  64 runs,  0 skips

 232340 UNITS in planar_rgbf32be_to_uv,   1 runs,  0 skips
 231830 UNITS in planar_rgbf32be_to_uv,   2 runs,  0 skips
 242235 UNITS in planar_rgbf32be_to_uv,   4 runs,  0 skips
 235210 UNITS in planar_rgbf32be_to_uv,   8 runs,  0 skips
 229040 UNITS in planar_rgbf32be_to_uv,  16 runs,  0 skips
 224996 UNITS in planar_rgbf32be_to_uv,  32 runs,  0 skips
 223581 UNITS in planar_rgbf32be_to_uv,  64 runs,  0 skips

 179220 UNITS in planar_rgbf32le_to_y,   1 runs,  0 skips
 174790 UNITS in planar_rgbf32le_to_y,   2 runs,  0 skips
 182630 UNITS in planar_rgbf32le_to_y,   4 runs,  0 skips
 183002 UNITS in planar_rgbf32le_to_y,   8 runs,  0 skips
 181005 UNITS in planar_rgbf32le_to_y,  16 runs,  0 skips
 179390 UNITS in planar_rgbf32le_to_y,  32 runs,  0 skips
 192476 UNITS in planar_rgbf32le_to_y,  64 runs,  0 skips

 195620 UNITS in planar_rgbf32be_to_y,   1 runs,  0 skips
 195860 UNITS in planar_rgbf32be_to_y,   2 runs,  0 skips
 198700 UNITS in planar_rgbf32be_to_y,   4 runs,  0 skips
 197252 UNITS in planar_rgbf32be_to_y,   8 runs,  0 skips
 195702 UNITS in planar_rgbf32be_to_y,  16 runs,  0 skips
 194853 UNITS in planar_rgbf32be_to_y,  32 runs,  0 skips
 194459 UNITS in planar_rgbf32be_to_y,  64 runs,  0 skips
---
 libswscale/input.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 90efdd2ffc..2a13846abe 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -966,6 +966,11 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,

 #define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): 
av_int2float(AV_RL32(src)))

+static av_always_inline float clampf(float x, float min, float max)
+{
+return FFMIN(FFMAX(x, min), max);
+}
+
 static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t 
*_src[4], int width, int is_be, int32_t *rgb2yuv)
 {
 int i;
@@ -973,7 +978,7 @@ static av_always_inline void planar_rgbf32_to_a(uint8_t 
*_dst, const uint8_t *_s

[FFmpeg-devel] [PATCH v2 1/2] swscale/input: unify grayf32 funcs with rgbf32 funcs

2021-11-13 Thread mindmark

From: Mark Reid 

This is ment to be a cosmetic change

old timings:
  42780 UNITS in grayf32le,   1 runs,  0 skips
  56720 UNITS in grayf32le,   2 runs,  0 skips
  67265 UNITS in grayf32le,   4 runs,  0 skips
  58082 UNITS in grayf32le,   8 runs,  0 skips
  63512 UNITS in grayf32le,  16 runs,  0 skips
  52720 UNITS in grayf32le,  32 runs,  0 skips
  46491 UNITS in grayf32le,  64 runs,  0 skips

  68500 UNITS in grayf32be,   1 runs,  0 skips
  66930 UNITS in grayf32be,   2 runs,  0 skips
  62305 UNITS in grayf32be,   4 runs,  0 skips
  55510 UNITS in grayf32be,   8 runs,  0 skips
  50216 UNITS in grayf32be,  16 runs,  0 skips
  44480 UNITS in grayf32be,  32 runs,  0 skips
  42394 UNITS in grayf32be,  64 runs,  0 skips

new timings:
  46660 UNITS in grayf32le,   1 runs,  0 skips
  51830 UNITS in grayf32le,   2 runs,  0 skips
  53390 UNITS in grayf32le,   4 runs,  0 skips
  50910 UNITS in grayf32le,   8 runs,  0 skips
  44968 UNITS in grayf32le,  16 runs,  0 skips
  40349 UNITS in grayf32le,  32 runs,  0 skips
  38330 UNITS in grayf32le,  64 runs,  0 skips

  39980 UNITS in grayf32be,   1 runs,  0 skips
  49630 UNITS in grayf32be,   2 runs,  0 skips
  53540 UNITS in grayf32be,   4 runs,  0 skips
  59767 UNITS in grayf32be,   8 runs,  0 skips
  51206 UNITS in grayf32be,  16 runs,  0 skips
  44743 UNITS in grayf32be,  32 runs,  0 skips
  41468 UNITS in grayf32be,  64 runs,  0 skips
---
 libswscale/input.c | 36 +++-
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 336f957c8c..90efdd2ffc 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1013,31 +1013,19 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t 
*_dst, const uint8_t *_s
 }
 }

-#undef rdpx
-
 static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused1,
-const uint8_t *unused2, int width, 
uint32_t *unused)
+const uint8_t *unused2, int width, 
int is_be, uint32_t *unused)
 {
 int i;
 const float *src = (const float *)_src;
 uint16_t *dst= (uint16_t *)_dst;

 for (i = 0; i < width; ++i){
-dst[i] = av_clip_uint16(lrintf(65535.0f * src[i]));
+dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i)));
 }
 }

-static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused1,
-  const uint8_t *unused2, int 
width, uint32_t *unused)
-{
-int i;
-const uint32_t *src = (const uint32_t *)_src;
-uint16_t *dst= (uint16_t *)_dst;
-
-for (i = 0; i < width; ++i){
-dst[i] = av_clip_uint16(lrintf(65535.0f * 
av_int2float(av_bswap32(src[i];
-}
-}
+#undef rdpx

 #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian)   
 \
 static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t 
*src[4],  \
@@ -1092,6 +1080,12 @@ static void planar_rgbf32##endian_name##_to_a(uint8_t 
*dst, const uint8_t *src[4
   int w, int32_t *rgb2yuv) 
 \
 {  
 \
 planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv);  
 \
+}  
 \
+static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src,
 \
+  const uint8_t *unused1, const 
uint8_t *unused2,   \
+  int width, uint32_t *unused) 
 \
+{  
 \
+grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused); 
 \
 }

 rgbf32_planar_funcs_endian(le, 0)
@@ -1699,18 +1693,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 c->lumToYV12 = p010BEToY_c;
 break;
 case AV_PIX_FMT_GRAYF32LE:
-#if HAVE_BIGENDIAN
-c->lumToYV12 = grayf32ToY16_bswap_c;
-#else
-c->lumToYV12 = grayf32ToY16_c;
-#endif
+c->lumToYV12 = grayf32leToY16_c;
 break;
 case AV_PIX_FMT_GRAYF32BE:
-#if HAVE_BIGENDIAN
-c->lumToYV12 = grayf32ToY16_c;
-#else
-c->lumToYV12 = grayf32ToY16_bswap_c;
-#endif
+c->lumToYV12 = grayf32beToY16_c;
 break;
 case AV_PIX_FMT_Y210LE:
 c->lumToYV12 = y210le_Y_c;
--
2.31.1.window

[FFmpeg-devel] [PATCH v2 1/2] swscale/input: unify grayf32 funcs with rgbf32 funcs

2021-11-07 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c | 36 +++-
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 336f957c8c..90efdd2ffc 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1013,31 +1013,19 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t 
*_dst, const uint8_t *_s
 }
 }
 
-#undef rdpx
-
 static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused1,
-const uint8_t *unused2, int width, 
uint32_t *unused)
+const uint8_t *unused2, int width, 
int is_be, uint32_t *unused)
 {
 int i;
 const float *src = (const float *)_src;
 uint16_t *dst= (uint16_t *)_dst;
 
 for (i = 0; i < width; ++i){
-dst[i] = av_clip_uint16(lrintf(65535.0f * src[i]));
+dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i)));
 }
 }
 
-static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused1,
-  const uint8_t *unused2, int 
width, uint32_t *unused)
-{
-int i;
-const uint32_t *src = (const uint32_t *)_src;
-uint16_t *dst= (uint16_t *)_dst;
-
-for (i = 0; i < width; ++i){
-dst[i] = av_clip_uint16(lrintf(65535.0f * 
av_int2float(av_bswap32(src[i];
-}
-}
+#undef rdpx
 
 #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian)   
 \
 static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t 
*src[4],  \
@@ -1092,6 +1080,12 @@ static void planar_rgbf32##endian_name##_to_a(uint8_t 
*dst, const uint8_t *src[4
   int w, int32_t *rgb2yuv) 
 \
 {  
 \
 planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv);  
 \
+}  
 \
+static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src,
 \
+  const uint8_t *unused1, const 
uint8_t *unused2,   \
+  int width, uint32_t *unused) 
 \
+{  
 \
+grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused); 
 \
 }
 
 rgbf32_planar_funcs_endian(le, 0)
@@ -1699,18 +1693,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 c->lumToYV12 = p010BEToY_c;
 break;
 case AV_PIX_FMT_GRAYF32LE:
-#if HAVE_BIGENDIAN
-c->lumToYV12 = grayf32ToY16_bswap_c;
-#else
-c->lumToYV12 = grayf32ToY16_c;
-#endif
+c->lumToYV12 = grayf32leToY16_c;
 break;
 case AV_PIX_FMT_GRAYF32BE:
-#if HAVE_BIGENDIAN
-c->lumToYV12 = grayf32ToY16_c;
-#else
-c->lumToYV12 = grayf32ToY16_bswap_c;
-#endif
+c->lumToYV12 = grayf32beToY16_c;
 break;
 case AV_PIX_FMT_Y210LE:
 c->lumToYV12 = y210le_Y_c;
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 2/2] swscale/input: clamp rgbf32 values between 0, 1 before scaling

2021-11-07 Thread mindmark

From: Mark Reid 

if the float pixel * 65535.0f > 2147483647.0f
lrintf may overfow and return negative values, depending on implementation.
nan and +/-inf values may also be implementation defined

clamp the values between 0,1 before scaling, so lrintf
always works.

values <=0.0f, -inf, nan = 0.0f
values >=1.0f, +inf  = 1.0f
---
 libswscale/input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 90efdd2ffc..fc8242a758 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -964,7 +964,7 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,
 }
 #undef rdpx
 
-#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): 
av_int2float(AV_RL32(src)))
+#define rdpx(src) (FFMIN(FFMAX(is_be ? av_int2float(AV_RB32(src)): 
av_int2float(AV_RL32(src)), 0.0f), 1.0f))
 
 static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t 
*_src[4], int width, int is_be, int32_t *rgb2yuv)
 {
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] swscale/input: clamp rgbf32 values between 0, 1 before scaling

2021-11-06 Thread mindmark

From: Mark Reid 

if the float pixel * 65535.0f > 2147483647.0f
lrintf may overfow and return negative values, depending on implementation.
nan and +/-inf values may also be implementation defined

clamp the values between 0,1 before scaling, so lrintf
always works.

values <=0.0f, -inf, nan = 0.0f
values >=1.0f, +inf  = 1.0f
---
 libswscale/input.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 336f957c8c..ea50c9de5c 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -964,7 +964,7 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,
 }
 #undef rdpx

-#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): 
av_int2float(AV_RL32(src)))
+#define rdpx(src) (FFMIN(FFMAX(is_be ? av_int2float(AV_RB32(src)): 
av_int2float(AV_RL32(src)), 0.0f), 1.0f))

 static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t 
*_src[4], int width, int is_be, int32_t *rgb2yuv)
 {
@@ -1013,17 +1013,16 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t 
*_dst, const uint8_t *_s
 }
 }

-#undef rdpx
-
 static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused1,
 const uint8_t *unused2, int width, 
uint32_t *unused)
 {
 int i;
 const float *src = (const float *)_src;
 uint16_t *dst= (uint16_t *)_dst;
+int is_be = 0;

 for (i = 0; i < width; ++i){
-dst[i] = av_clip_uint16(lrintf(65535.0f * src[i]));
+dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i)));
 }
 }

@@ -1033,12 +1032,15 @@ static av_always_inline void 
grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t *
 int i;
 const uint32_t *src = (const uint32_t *)_src;
 uint16_t *dst= (uint16_t *)_dst;
+int is_be = 1;

 for (i = 0; i < width; ++i){
-dst[i] = av_clip_uint16(lrintf(65535.0f * 
av_int2float(av_bswap32(src[i];
+dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src+ i)));
 }
 }

+#undef rdpx
+
 #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian)   
 \
 static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t 
*src[4],  \
   int w, int32_t *rgb2yuv) 
 \
--
2.29.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] swscale/x86/output.asm: add x86-optimized planer gbr yuv2anyX functions

2021-11-03 Thread mindmark

From: Mark Reid 

changes since v1:
* remove vex intruction on sse4 path
* some load/pack marcos use less intructions
* fixed some typos

yuv2gbrp_full_X_4_512_c: 12757.6
yuv2gbrp_full_X_4_512_sse2: 8946.6
yuv2gbrp_full_X_4_512_sse4: 5138.6
yuv2gbrp_full_X_4_512_avx2: 3889.6
yuv2gbrap_full_X_4_512_c: 15368.6
yuv2gbrap_full_X_4_512_sse2: 11916.1
yuv2gbrap_full_X_4_512_sse4: 6294.6
yuv2gbrap_full_X_4_512_avx2: 3477.1
yuv2gbrp9be_full_X_4_512_c: 14381.6
yuv2gbrp9be_full_X_4_512_sse2: 9139.1
yuv2gbrp9be_full_X_4_512_sse4: 5150.1
yuv2gbrp9be_full_X_4_512_avx2: 2834.6
yuv2gbrp9le_full_X_4_512_c: 12990.1
yuv2gbrp9le_full_X_4_512_sse2: 9118.1
yuv2gbrp9le_full_X_4_512_sse4: 5132.1
yuv2gbrp9le_full_X_4_512_avx2: 2833.1
yuv2gbrp10be_full_X_4_512_c: 14401.6
yuv2gbrp10be_full_X_4_512_sse2: 9133.1
yuv2gbrp10be_full_X_4_512_sse4: 5126.1
yuv2gbrp10be_full_X_4_512_avx2: 2837.6
yuv2gbrp10le_full_X_4_512_c: 12718.1
yuv2gbrp10le_full_X_4_512_sse2: 9106.1
yuv2gbrp10le_full_X_4_512_sse4: 5120.1
yuv2gbrp10le_full_X_4_512_avx2: 2826.1
yuv2gbrap10be_full_X_4_512_c: 18535.6
yuv2gbrap10be_full_X_4_512_sse2: 33617.6
yuv2gbrap10be_full_X_4_512_sse4: 6264.1
yuv2gbrap10be_full_X_4_512_avx2: 3422.1
yuv2gbrap10le_full_X_4_512_c: 16724.1
yuv2gbrap10le_full_X_4_512_sse2: 11787.1
yuv2gbrap10le_full_X_4_512_sse4: 6282.1
yuv2gbrap10le_full_X_4_512_avx2: 3441.6
yuv2gbrp12be_full_X_4_512_c: 13723.6
yuv2gbrp12be_full_X_4_512_sse2: 9128.1
yuv2gbrp12be_full_X_4_512_sse4: 7997.6
yuv2gbrp12be_full_X_4_512_avx2: 2844.1
yuv2gbrp12le_full_X_4_512_c: 12257.1
yuv2gbrp12le_full_X_4_512_sse2: 9107.6
yuv2gbrp12le_full_X_4_512_sse4: 5142.6
yuv2gbrp12le_full_X_4_512_avx2: 2837.6
yuv2gbrap12be_full_X_4_512_c: 18511.1
yuv2gbrap12be_full_X_4_512_sse2: 12156.6
yuv2gbrap12be_full_X_4_512_sse4: 6251.1
yuv2gbrap12be_full_X_4_512_avx2: 3444.6
yuv2gbrap12le_full_X_4_512_c: 16687.1
yuv2gbrap12le_full_X_4_512_sse2: 11785.1
yuv2gbrap12le_full_X_4_512_sse4: 6243.6
yuv2gbrap12le_full_X_4_512_avx2: 3446.1
yuv2gbrp14be_full_X_4_512_c: 13690.6
yuv2gbrp14be_full_X_4_512_sse2: 9120.6
yuv2gbrp14be_full_X_4_512_sse4: 5138.1
yuv2gbrp14be_full_X_4_512_avx2: 2843.1
yuv2gbrp14le_full_X_4_512_c: 14995.6
yuv2gbrp14le_full_X_4_512_sse2: 9119.1
yuv2gbrp14le_full_X_4_512_sse4: 5126.1
yuv2gbrp14le_full_X_4_512_avx2: 2843.1
yuv2gbrp16be_full_X_4_512_c: 12367.1
yuv2gbrp16be_full_X_4_512_sse2: 8233.6
yuv2gbrp16be_full_X_4_512_sse4: 4820.1
yuv2gbrp16be_full_X_4_512_avx2: 2666.6
yuv2gbrp16le_full_X_4_512_c: 10904.1
yuv2gbrp16le_full_X_4_512_sse2: 8214.1
yuv2gbrp16le_full_X_4_512_sse4: 4824.1
yuv2gbrp16le_full_X_4_512_avx2: 2629.1
yuv2gbrap16be_full_X_4_512_c: 26569.6
yuv2gbrap16be_full_X_4_512_sse2: 10884.1
yuv2gbrap16be_full_X_4_512_sse4: 5488.1
yuv2gbrap16be_full_X_4_512_avx2: 3272.1
yuv2gbrap16le_full_X_4_512_c: 14010.1
yuv2gbrap16le_full_X_4_512_sse2: 10562.1
yuv2gbrap16le_full_X_4_512_sse4: 5463.6
yuv2gbrap16le_full_X_4_512_avx2: 3255.1
yuv2gbrpf32be_full_X_4_512_c: 14524.1
yuv2gbrpf32be_full_X_4_512_sse2: 8552.6
yuv2gbrpf32be_full_X_4_512_sse4: 4636.1
yuv2gbrpf32be_full_X_4_512_avx2: 2474.6
yuv2gbrpf32le_full_X_4_512_c: 13060.6
yuv2gbrpf32le_full_X_4_512_sse2: 9682.6
yuv2gbrpf32le_full_X_4_512_sse4: 4298.1
yuv2gbrpf32le_full_X_4_512_avx2: 2453.1
yuv2gbrapf32be_full_X_4_512_c: 18629.6
yuv2gbrapf32be_full_X_4_512_sse2: 11363.1
yuv2gbrapf32be_full_X_4_512_sse4: 15201.6
yuv2gbrapf32be_full_X_4_512_avx2: 3727.1
yuv2gbrapf32le_full_X_4_512_c: 16677.6
yuv2gbrapf32le_full_X_4_512_sse2: 10221.6
yuv2gbrapf32le_full_X_4_512_sse4: 5693.6
yuv2gbrapf32le_full_X_4_512_avx2: 3656.6

---
 libswscale/x86/output.asm | 435 +-
 libswscale/x86/swscale.c  |  98 +
 tests/checkasm/Makefile   |   2 +-
 tests/checkasm/checkasm.c |   1 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/sw_gbrp.c  | 198 +
 tests/fate/checkasm.mak   |   1 +
 7 files changed, 734 insertions(+), 2 deletions(-)
 create mode 100644 tests/checkasm/sw_gbrp.c

diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 52cf9f2c2e..ce0a2650b4 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -38,7 +38,49 @@ pw_32: times 8 dw 32
 pd_255:times 8 dd 255
 pw_512:times 8 dw 512
 pw_1024:   times 8 dw 1024
-
+pd_65535_invf: times 8 dd 0x37800080 ;1.0/65535.0
+pd_yuv2gbrp16_start:   times 8 dd -0x4000
+pd_yuv2gbrp_y_start:   times 8 dd  (1 << 9)
+pd_yuv2gbrp_uv_start:  times 8 dd  ((1 << 9) - (128 << 19))
+pd_yuv2gbrp_a_start:   times 8 dd  (1 << 18)
+pd_yuv2gbrp16_offset:  times 8 dd  0x1  ;(1 << 16)
+pd_yuv2gbrp16_round13: times 8 dd  0x02000  ;(1 << 13)
+pd_yuv2gbrp16_a_offset:times 8 dd  0x20002000
+pd_yuv2gbrp16_upper30: times 8 dd  0x3FFF ;(1<<30) - 1
+pd_yuv2gbrp16_upper27: times 8 dd  0x07FF ;(1<<27) - 1
+pd_yuv2gbrp16_upperC:  times 8 dd  0xC000
+pb_pack_shuffle8:   db  0,  4,  8, 12, \
+   -1, -1, -1, -1, \
+

[FFmpeg-devel] [PATCH 1/1] swscale/input: fix planar_rgb16_to_a for gbrap10be and gbrap12be formats

2021-11-03 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c  | 2 +-
 tests/ref/fate/filter-pixfmts-scale | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 477dc3d6b2..336f957c8c 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -911,7 +911,7 @@ static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t 
*_dstV, const uint8_t *src[
 }
 
 #define rdpx(src) \
-is_be ? AV_RB16(src) : AV_RL16(src)
+(is_be ? AV_RB16(src) : AV_RL16(src))
 static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t 
*_src[4],
int width, int bpc, int is_be, 
int32_t *rgb2yuv)
 {
diff --git a/tests/ref/fate/filter-pixfmts-scale 
b/tests/ref/fate/filter-pixfmts-scale
index 07c4ff536d..1623e5c939 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -19,9 +19,9 @@ bgrad8316272bc3a360ef9dff3ecc84520a3
 bgra64be4e6a1b9f9c18b881c27d76611d45f737
 bgra64leefeee0abcc658ebcff049d5e74d74943
 gbrap   4a100f750ac846b34bfeef0d6893c3de
-gbrap10be   dc6aea3559ea4fcdda1ccc4f23d2f2fb
+gbrap10be   50735fbc471a5ac5a6645c85881f3670
 gbrap10le   6e1cba57029fdf0f9d46b5e5cd55112b
-gbrap12be   dbe3a662c016563529032cd4dfb80262
+gbrap12be   58170165829484b3db4a3b9165198987
 gbrap12le   24f5ecb32435b73353517e017c165e31
 gbrap16be   31968e6872a46e8174fb57f8920ed10d
 gbrap16le   8c6758f33671b673b6d30969fc05a23d
-- 
2.29.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] swscale/x86/output.asm: add x86-optimized planer gbr yuv2anyX functions

2021-10-24 Thread mindmark

From: Mark Reid 

yuv2gbrp_full_X_4_512_c: 12096.6
yuv2gbrp_full_X_4_512_sse2: 10782.6
yuv2gbrp_full_X_4_512_sse4: 5143.6
yuv2gbrp_full_X_4_512_avx2: 3000.1
yuv2gbrap_full_X_4_512_c: 15463.1
yuv2gbrap_full_X_4_512_sse2: 14296.6
yuv2gbrap_full_X_4_512_sse4: 6319.1
yuv2gbrap_full_X_4_512_avx2: 3554.1
yuv2gbrp9be_full_X_4_512_c: 14281.6
yuv2gbrp9be_full_X_4_512_sse2: 11206.1
yuv2gbrp9be_full_X_4_512_sse4: 5033.6
yuv2gbrp9be_full_X_4_512_avx2: 3012.6
yuv2gbrp9le_full_X_4_512_c: 12688.6
yuv2gbrp9le_full_X_4_512_sse2: 10914.1
yuv2gbrp9le_full_X_4_512_sse4: 5144.6
yuv2gbrp9le_full_X_4_512_avx2: 3014.6
yuv2gbrp10be_full_X_4_512_c: 14257.6
yuv2gbrp10be_full_X_4_512_sse2: 11089.6
yuv2gbrp10be_full_X_4_512_sse4: 5039.1
yuv2gbrp10be_full_X_4_512_avx2: 3001.1
yuv2gbrp10le_full_X_4_512_c: 12098.6
yuv2gbrp10le_full_X_4_512_sse2: 10884.1
yuv2gbrp10le_full_X_4_512_sse4: 5138.1
yuv2gbrp10le_full_X_4_512_avx2: 2999.6
yuv2gbrap10be_full_X_4_512_c: 18549.6
yuv2gbrap10be_full_X_4_512_sse2: 14538.6
yuv2gbrap10be_full_X_4_512_sse4: 6292.6
yuv2gbrap10be_full_X_4_512_avx2: 3583.6
yuv2gbrap10le_full_X_4_512_c: 16631.1
yuv2gbrap10le_full_X_4_512_sse2: 14190.6
yuv2gbrap10le_full_X_4_512_sse4: 6348.1
yuv2gbrap10le_full_X_4_512_avx2: 3554.6
yuv2gbrp12be_full_X_4_512_c: 13555.1
yuv2gbrp12be_full_X_4_512_sse2: 10952.1
yuv2gbrp12be_full_X_4_512_sse4: 5137.6
yuv2gbrp12be_full_X_4_512_avx2: 3009.6
yuv2gbrp12le_full_X_4_512_c: 12082.6
yuv2gbrp12le_full_X_4_512_sse2: 10891.1
yuv2gbrp12le_full_X_4_512_sse4: 5184.1
yuv2gbrp12le_full_X_4_512_avx2: 3011.1
yuv2gbrap12be_full_X_4_512_c: 18689.6
yuv2gbrap12be_full_X_4_512_sse2: 14522.6
yuv2gbrap12be_full_X_4_512_sse4: 6237.6
yuv2gbrap12be_full_X_4_512_avx2: 3585.6
yuv2gbrap12le_full_X_4_512_c: 16760.6
yuv2gbrap12le_full_X_4_512_sse2: 14202.1
yuv2gbrap12le_full_X_4_512_sse4: 6252.1
yuv2gbrap12le_full_X_4_512_avx2: 3591.1
yuv2gbrp14be_full_X_4_512_c: 13555.6
yuv2gbrp14be_full_X_4_512_sse2: 10949.1
yuv2gbrp14be_full_X_4_512_sse4: 5185.1
yuv2gbrp14be_full_X_4_512_avx2: 3012.1
yuv2gbrp14le_full_X_4_512_c: 12068.1
yuv2gbrp14le_full_X_4_512_sse2: 10883.6
yuv2gbrp14le_full_X_4_512_sse4: 5145.1
yuv2gbrp14le_full_X_4_512_avx2: 3007.1
yuv2gbrp16be_full_X_4_512_c: 12383.6
yuv2gbrp16be_full_X_4_512_sse2: 8230.6
yuv2gbrp16be_full_X_4_512_sse4: 4765.6
yuv2gbrp16be_full_X_4_512_avx2: 2742.6
yuv2gbrp16le_full_X_4_512_c: 10906.1
yuv2gbrp16le_full_X_4_512_sse2: 28732.1
yuv2gbrp16le_full_X_4_512_sse4: 4709.6
yuv2gbrp16le_full_X_4_512_avx2: 2753.1
yuv2gbrap16be_full_X_4_512_c: 15472.6
yuv2gbrap16be_full_X_4_512_sse2: 11021.6
yuv2gbrap16be_full_X_4_512_sse4: 5487.6
yuv2gbrap16be_full_X_4_512_avx2: 3143.6
yuv2gbrap16le_full_X_4_512_c: 13668.6
yuv2gbrap16le_full_X_4_512_sse2: 10562.1
yuv2gbrap16le_full_X_4_512_sse4: 5506.6
yuv2gbrap16le_full_X_4_512_avx2: 3149.6
yuv2gbrpf32be_full_X_4_512_c: 15471.1
yuv2gbrpf32be_full_X_4_512_sse2: 8524.6
yuv2gbrpf32be_full_X_4_512_sse4: 4559.1
yuv2gbrpf32be_full_X_4_512_avx2: 2388.1
yuv2gbrpf32le_full_X_4_512_c: 14247.6
yuv2gbrpf32le_full_X_4_512_sse2: 7600.6
yuv2gbrpf32le_full_X_4_512_sse4: 4385.6
yuv2gbrpf32le_full_X_4_512_avx2: 2258.6
yuv2gbrapf32be_full_X_4_512_c: 18412.1
yuv2gbrapf32be_full_X_4_512_sse2: 11353.6
yuv2gbrapf32be_full_X_4_512_sse4: 5807.1
yuv2gbrapf32be_full_X_4_512_avx2: 2928.1
yuv2gbrapf32le_full_X_4_512_c: 16485.1
yuv2gbrapf32le_full_X_4_512_sse2: 10202.1
yuv2gbrapf32le_full_X_4_512_sse4: 5571.6
yuv2gbrapf32le_full_X_4_512_avx2: 2847.6


---
 libswscale/x86/output.asm | 440 +-
 libswscale/x86/swscale.c  |  99 +
 tests/checkasm/Makefile   |   2 +-
 tests/checkasm/checkasm.c |   1 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/sw_gbrp.c  | 198 +
 tests/fate/checkasm.mak   |   1 +
 7 files changed, 740 insertions(+), 2 deletions(-)
 create mode 100644 tests/checkasm/sw_gbrp.c

diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 52cf9f2c2e..e80b6256b4 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -38,7 +38,49 @@ pw_32: times 8 dw 32
 pd_255:times 8 dd 255
 pw_512:times 8 dw 512
 pw_1024:   times 8 dw 1024
-
+pd_65535_invf: times 8 dd 0x37800080 ;1.0/65535.0
+pd_yuv2gbrp16_start:   times 8 dd -0x4000
+pd_yuv2gbrp_y_start:   times 8 dd  (1 << 9)
+pd_yuv2gbrp_uv_start:  times 8 dd  ((1 << 9) - (128 << 19))
+pd_yuv2gbrp_a_start:   times 8 dd  (1 << 18)
+pd_yuv2gbrp16_offset:  times 8 dd  0x1  ;(1 << 16)
+pd_yuv2gbrp16_round13: times 8 dd  0x02000  ;(1 << 13)
+pd_yuv2gbrp16_a_offset:times 8 dd  0x20002000
+pd_yuv2gbrp16_upper30: times 8 dd  0x3FFF ;(1<<30) - 1
+pd_yuv2gbrp16_upper27: times 8 dd  0x07FF ;(1<<27) - 1
+pd_yuv2gbrp16_upperC:  times 8 dd  0xC000
+pb_lo_pack_shuffle8:db  0,  4,  8, 12, \
+   -1, -1, -1, -1, \
+   -1, -1, -1, -1, \
+   -1, -1, -1, -1
+pb_hi_pack_shuffle8:db -1, -1, -1, -1,

[FFmpeg-devel] [PATCH] avfilter/vf_lut3d: fix building with --disable-optimizations

2021-10-10 Thread mindmark

From: Mark Reid 

---
 libavfilter/x86/vf_lut3d_init.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavfilter/x86/vf_lut3d_init.c b/libavfilter/x86/vf_lut3d_init.c
index 3b3dd18680..db1a152f51 100644
--- a/libavfilter/x86/vf_lut3d_init.c
+++ b/libavfilter/x86/vf_lut3d_init.c
@@ -48,9 +48,11 @@ static int interp_##name##_##format##_##opt(AVFilterContext 
*ctx, void *arg, int
 DEFINE_INTERP_FUNC(tetrahedral, pf32, avx)
 DEFINE_INTERP_FUNC(tetrahedral, p16,  avx)
 #endif
+#if HAVE_SSE2_EXTERNAL
 DEFINE_INTERP_FUNC(tetrahedral, pf32, sse2)
 DEFINE_INTERP_FUNC(tetrahedral, p16,  sse2)
 #endif
+#endif
 
 
 av_cold void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc)
@@ -78,11 +80,13 @@ av_cold void ff_lut3d_init_x86(LUT3DContext *s, const 
AVPixFmtDescriptor *desc)
 }
 #endif
 } else if (EXTERNAL_SSE2(cpu_flags) && s->interpolation == 
INTERPOLATE_TETRAHEDRAL && planar) {
+#if HAVE_SSE2_EXTERNAL
 if (isfloat) {
 s->interp = interp_tetrahedral_pf32_sse2;
 } else if (depth == 16) {
 s->interp = interp_tetrahedral_p16_sse2;
 }
+#endif
 }
 #endif
 }
-- 
2.31.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation

2021-10-05 Thread mindmark

From: Mark Reid 

I spotted an interesting pattern that I didn't see before that leads to the 
implementation being faster.
The bit shifting table I was using before is no longer needed, and was able to 
remove quite a few lines. 
I also add use of FMA on the AVX2 version.

f32 1920x1080 1 thread with prelut
c impl
1434012700 UNITS in lut3d->interp,       1 runs,      0 skips
1434035335 UNITS in lut3d->interp,       2 runs,      0 skips
1423615347 UNITS in lut3d->interp,       4 runs,      0 skips
1426268863 UNITS in lut3d->interp,       8 runs,      0 skips

sse2
905484420 UNITS in lut3d->interp,       1 runs,      0 skips
905659010 UNITS in lut3d->interp,       2 runs,      0 skips
915167140 UNITS in lut3d->interp,       4 runs,      0 skips
915834222 UNITS in lut3d->interp,       8 runs,      0 skips

avx
574794860 UNITS in lut3d->interp,       1 runs,      0 skips
581035090 UNITS in lut3d->interp,       2 runs,      0 skips
584116720 UNITS in lut3d->interp,       4 runs,      0 skips
581460290 UNITS in lut3d->interp,       8 runs,      0 skips

avx2
301698880 UNITS in lut3d->interp,       1 runs,      0 skips
301982880 UNITS in lut3d->interp,       2 runs,      0 skips
306962430 UNITS in lut3d->interp,       4 runs,      0 skips
305472025 UNITS in lut3d->interp,       8 runs,      0 skips

gbrap16 1920x1080 1 thread with prelut
c impl
1480894840 UNITS in lut3d->interp,       1 runs,      0 skips
1502922990 UNITS in lut3d->interp,       2 runs,      0 skips
1496114307 UNITS in lut3d->interp,       4 runs,      0 skips
1492554551 UNITS in lut3d->interp,       8 runs,      0 skips

sse2
980777180 UNITS in lut3d->interp,       1 runs,      0 skips
986121520 UNITS in lut3d->interp,       2 runs,      0 skips
986489840 UNITS in lut3d->interp,       4 runs,      0 skips
998832248 UNITS in lut3d->interp,       8 runs,      0 skips

avx
622212360 UNITS in lut3d->interp,       1 runs,      0 skips
622981160 UNITS in lut3d->interp,       2 runs,      0 skips
645396315 UNITS in lut3d->interp,       4 runs,      0 skips
641057075 UNITS in lut3d->interp,       8 runs,      0 skips

avx2
321336400 UNITS in lut3d->interp,       1 runs,      0 skips
321268920 UNITS in lut3d->interp,       2 runs,      0 skips
323459895 UNITS in lut3d->interp,       4 runs,      0 skips
324949967 UNITS in lut3d->interp,       8 runs,      0 skips

---
 libavfilter/lut3d.h |  83 
 libavfilter/vf_lut3d.c  |  61 +--
 libavfilter/x86/Makefile|   2 +
 libavfilter/x86/vf_lut3d.asm| 662 
 libavfilter/x86/vf_lut3d_init.c |  88 +
 5 files changed, 840 insertions(+), 56 deletions(-)
 create mode 100644 libavfilter/lut3d.h
 create mode 100644 libavfilter/x86/vf_lut3d.asm
 create mode 100644 libavfilter/x86/vf_lut3d_init.c

diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h
new file mode 100644
index 00..ded2a036a5
--- /dev/null
+++ b/libavfilter/lut3d.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2013 Clément Bœsch
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_LUT3D_H
+#define AVFILTER_LUT3D_H
+
+#include "libavutil/pixdesc.h"
+#include "framesync.h"
+#include "avfilter.h"
+
+enum interp_mode {
+INTERPOLATE_NEAREST,
+INTERPOLATE_TRILINEAR,
+INTERPOLATE_TETRAHEDRAL,
+INTERPOLATE_PYRAMID,
+INTERPOLATE_PRISM,
+NB_INTERP_MODE
+};
+
+struct rgbvec {
+float r, g, b;
+};
+
+/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
+ * of 512x512 (64x64x64) */
+#define MAX_LEVEL 256
+#define PRELUT_SIZE 65536
+
+typedef struct Lut3DPreLut {
+int size;
+float min[3];
+float max[3];
+float scale[3];
+float* lut[3];
+} Lut3DPreLut;
+
+typedef struct LUT3DContext {
+const AVClass *class;
+struct rgbvec *lut;
+int lutsize;
+int lutsize2;
+struct rgbvec scale;
+int interpolation;  ///
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in t

[FFmpeg-devel] [PATCH] avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation

2021-09-28 Thread mindmark

From: Mark Reid 

Only supports float and 16bit planer formats at the momoment.
Mainly focused on AVX and AVX2 optimizations, but SSE2 does seem offer some
speed gains.

f32 1920x1080 1 thread with prelut
c impl
1389936500 UNITS in lut3d->interp,       1 runs,      0 skips
1425800240 UNITS in lut3d->interp,       2 runs,      0 skips
1433312777 UNITS in lut3d->interp,       4 runs,      0 skips
1443346798 UNITS in lut3d->interp,       8 runs,      0 skips

sse2
948662320 UNITS in lut3d->interp,       1 runs,      0 skips
1101247540 UNITS in lut3d->interp,       2 runs,      0 skips
1050645695 UNITS in lut3d->interp,       4 runs,      0 skips
1041102937 UNITS in lut3d->interp,       8 runs,      0 skips

avx
633837000 UNITS in lut3d->interp,       1 runs,      0 skips
669452850 UNITS in lut3d->interp,       2 runs,      0 skips
650716580 UNITS in lut3d->interp,       4 runs,      0 skips
644698550 UNITS in lut3d->interp,       8 runs,      0 skips

avx2
354940020 UNITS in lut3d->interp,       1 runs,      0 skips
362384340 UNITS in lut3d->interp,       2 runs,      0 skips
356799020 UNITS in lut3d->interp,       4 runs,      0 skips
357276815 UNITS in lut3d->interp,       8 runs,      0 skips

gbrap16 1920x1080 1 thread with prelut
c impl
1445071160 UNITS in lut3d->interp,       1 runs,      0 skips
1477959120 UNITS in lut3d->interp,       2 runs,      0 skips
1472102670 UNITS in lut3d->interp,       4 runs,      0 skips
1462579330 UNITS in lut3d->interp,       8 runs,      0 skips

sse2
1035437580 UNITS in lut3d->interp,       1 runs,      0 skips
1050139710 UNITS in lut3d->interp,       2 runs,      0 skips
1070147205 UNITS in lut3d->interp,       4 runs,      0 skips
1064583037 UNITS in lut3d->interp,       8 runs,      0 skips

avx
678089880 UNITS in lut3d->interp,       1 runs,      0 skips
679112485 UNITS in lut3d->interp,       2 runs,      0 skips
695527212 UNITS in lut3d->interp,       4 runs,      0 skips
691300053 UNITS in lut3d->interp,       8 runs,      0 skips

avx2
372671340 UNITS in lut3d->interp,       1 runs,      0 skips
373449870 UNITS in lut3d->interp,       2 runs,      0 skips
383725625 UNITS in lut3d->interp,       4 runs,      0 skips
382860848 UNITS in lut3d->interp,       8 runs,      0 skips

---
 libavfilter/lut3d.h |  83 
 libavfilter/vf_lut3d.c  |  61 +--
 libavfilter/x86/Makefile|   2 +
 libavfilter/x86/vf_lut3d.asm| 757 
 libavfilter/x86/vf_lut3d_init.c |  88 
 5 files changed, 935 insertions(+), 56 deletions(-)
 create mode 100644 libavfilter/lut3d.h
 create mode 100644 libavfilter/x86/vf_lut3d.asm
 create mode 100644 libavfilter/x86/vf_lut3d_init.c

diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h
new file mode 100644
index 00..ded2a036a5
--- /dev/null
+++ b/libavfilter/lut3d.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2013 Clément Bœsch
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_LUT3D_H
+#define AVFILTER_LUT3D_H
+
+#include "libavutil/pixdesc.h"
+#include "framesync.h"
+#include "avfilter.h"
+
+enum interp_mode {
+INTERPOLATE_NEAREST,
+INTERPOLATE_TRILINEAR,
+INTERPOLATE_TETRAHEDRAL,
+INTERPOLATE_PYRAMID,
+INTERPOLATE_PRISM,
+NB_INTERP_MODE
+};
+
+struct rgbvec {
+float r, g, b;
+};
+
+/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
+ * of 512x512 (64x64x64) */
+#define MAX_LEVEL 256
+#define PRELUT_SIZE 65536
+
+typedef struct Lut3DPreLut {
+int size;
+float min[3];
+float max[3];
+float scale[3];
+float* lut[3];
+} Lut3DPreLut;
+
+typedef struct LUT3DContext {
+const AVClass *class;
+struct rgbvec *lut;
+int lutsize;
+int lutsize2;
+struct rgbvec scale;
+int interpolation;  ///
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of

[FFmpeg-devel] [PATCH v2 2/2] libavdevice/avfoundation: add option to set audio sample rate and use native device formats

2021-04-11 Thread mindmark

From: Mark Reid 

This also seems to prevent the audio format changing after format has been 
identified. 
This can happen in ffplay and might have something to do with sdl configuring 
the audio devices.

---
 libavdevice/avfoundation.m | 123 -
 1 file changed, 94 insertions(+), 29 deletions(-)

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 5ac6ec4183..70226cfdc8 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -118,11 +118,8 @@ typedef struct
 
 int audio_channels;
 int audio_bits_per_sample;
-int audio_float;
-int audio_be;
-int audio_signed_integer;
-int audio_packed;
 int audio_non_interleaved;
+int audio_sample_rate;
 
 int32_t *audio_buffer;
 int audio_buffer_size;
@@ -632,12 +629,47 @@ static int add_video_device(AVFormatContext *s, 
AVCaptureDevice *video_device)
 return 0;
 }
 
+static enum AVCodecID find_audio_codec_id(const AudioStreamBasicDescription 
*basic_desc)
+{
+int audio_float   = basic_desc->mFormatFlags & 
kAudioFormatFlagIsFloat;
+int audio_signed_integer  = basic_desc->mFormatFlags & 
kAudioFormatFlagIsSignedInteger;
+int audio_be  = basic_desc->mFormatFlags & 
kAudioFormatFlagIsBigEndian;
+int audio_packed  = basic_desc->mFormatFlags & 
kAudioFormatFlagIsPacked;
+int audio_bits_per_sample = basic_desc->mBitsPerChannel;
+
+if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
+audio_float &&
+audio_bits_per_sample == 32 &&
+audio_packed) {
+return audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
+} else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
+audio_signed_integer &&
+audio_bits_per_sample == 16 &&
+audio_packed) {
+return audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
+} else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
+audio_signed_integer &&
+audio_bits_per_sample == 24 &&
+audio_packed) {
+return audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
+} else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
+audio_signed_integer &&
+audio_bits_per_sample == 32 &&
+audio_packed) {
+return audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
+} else {
+return AV_CODEC_ID_NONE;
+}
+}
+
 static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device)
 {
 AVFContext *ctx = (AVFContext*)s->priv_data;
 NSError *error  = nil;
 AVCaptureDeviceInput* audio_dev_input = [[[AVCaptureDeviceInput alloc] 
initWithDevice:audio_device error:&error] autorelease];
 dispatch_queue_t queue;
+NSObject *format = nil;
+const AudioStreamBasicDescription *format_desc = NULL;
 
 if (!audio_dev_input) {
 av_log(s, AV_LOG_ERROR, "Failed to create AV capture input device: 
%s\n",
@@ -660,6 +692,61 @@ static int add_audio_device(AVFormatContext *s, 
AVCaptureDevice *audio_device)
 return 1;
 }
 
+#if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
+
+for (format in [audio_device valueForKey:@"formats"]) {
+CMFormatDescriptionRef formatDescription;
+formatDescription = (CMFormatDescriptionRef) [format 
performSelector:@selector(formatDescription)];
+const AudioStreamBasicDescription *desc = 
CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription);
+
+if (desc->mSampleRate == ctx->audio_sample_rate) {
+format_desc = desc;
+break;
+}
+}
+
+if(!format_desc) {
+av_log(s, AV_LOG_ERROR, "Selected audio sample rate (%d Hz) is not 
supported\n", ctx->audio_sample_rate);
+av_log(s, AV_LOG_ERROR, "Supported audio formats:\n");
+for (format in [audio_device valueForKey:@"formats"]) {
+const char *codec_name;
+CMFormatDescriptionRef formatDescription;
+formatDescription = (CMFormatDescriptionRef) [format 
performSelector:@selector(formatDescription)];
+const AudioStreamBasicDescription *desc = 
CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription);
+
+enum AVCodecID codec_id = find_audio_codec_id(desc);
+if (codec_id == AV_CODEC_ID_NONE) {
+continue;
+}
+
+codec_name =  avcodec_get_name(codec_id);
+av_log(s, AV_LOG_ERROR, "  %s, %d ch, %0.0f Hz \n", codec_name, 
desc->mChannelsPerFrame, desc->mSampleRate);
+}
+
+format_desc = 
CMAudioFormatDescriptionGetStreamBasicDescription(audio_device.activeFormat.formatDescription);
+if (format_desc)
+av_log(s, AV_LOG_WARNING, "Overriding selected sample rate with 
active sample rate: %0.0f Hz instead\n", format_desc->mSa

[FFmpeg-devel] [PATCH v2 1/2] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive

2021-04-11 Thread mindmark

From: Mark Reid 

This fixes audio issues I've had with some capture devices. The audio
gets really choppy and stops working. This seems to be because avf_read_packet
stops outputting the audio frames because a video frame happens to be available 
first.

It base on the approach used in a patch from #4437
https://trac.ffmpeg.org/ticket/4437

My approach uses an AVFifoBuffer instead of NSMutableArray and also
outputs the packets in the same order they arrive from AVFFoundation.

should fix ticket #4437 and #4513
---
 libavdevice/avfoundation.m | 160 -
 1 file changed, 124 insertions(+), 36 deletions(-)

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 59d5b0af4f..5ac6ec4183 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -31,13 +31,17 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
 #include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
 #include "libavformat/internal.h"
 #include "libavutil/internal.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/time.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/fifo.h"
 #include "avdevice.h"
 
+#define FIFO_SIZE 4
+
 static const int avf_time_base = 100;
 
 static const AVRational avf_time_base_q = {
@@ -128,8 +132,8 @@ typedef struct
 AVCaptureSession *capture_session;
 AVCaptureVideoDataOutput *video_output;
 AVCaptureAudioDataOutput *audio_output;
-CMSampleBufferRef current_frame;
-CMSampleBufferRef current_audio_frame;
+AVFifoBuffer *video_fifo;
+AVFifoBuffer *audio_fifo;
 
 AVCaptureDevice  *observed_device;
 #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
@@ -138,6 +142,11 @@ typedef struct
 int  observed_quit;
 } AVFContext;
 
+typedef struct {
+int64_t ts;
+CMSampleBufferRef frame;
+} BufferRef;
+
 static void lock_frames(AVFContext* ctx)
 {
 pthread_mutex_lock(&ctx->frame_lock);
@@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx)
 pthread_mutex_unlock(&ctx->frame_lock);
 }
 
+static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef 
frame)
+{
+BufferRef buf = {
+.ts= ts,
+.frame = frame,
+};
+
+CFRetain(frame);
+av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL);
+}
+
+static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf)
+{
+if (av_fifo_size(f)) {
+av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL);
+return;
+}
+buf->frame = nil;
+return;
+}
+
+static inline void fifo_drain(AVFifoBuffer* f, int release)
+{
+av_assert2(av_fifo_size(f) >= sizeof(BufferRef));
+if (release) {
+BufferRef buf;
+fifo_peek(f, &buf);
+CFRelease(buf.frame);
+}
+av_fifo_drain(f, sizeof(BufferRef));
+}
+
+static inline void fifo_freep(AVFifoBuffer **f)
+{
+if (f) {
+while (av_fifo_size(*f)) {
+fifo_drain(*f, 1);
+}
+av_fifo_freep(f);
+}
+}
+
 /** FrameReciever class - delegate for AVCaptureSession
  */
 @interface AVFFrameReceiver : NSObject
@@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx)
   didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
  fromConnection:(AVCaptureConnection *)connection
 {
+AVFifoBuffer *fifo = _context->video_fifo;
+int64_t ts = av_gettime_relative();
 lock_frames(_context);
 
-if (_context->current_frame != nil) {
-CFRelease(_context->current_frame);
+if (av_fifo_space(fifo) == 0) {
+av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame 
has been dropped\n");
+fifo_drain(fifo, 1);
 }
 
-_context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
+fifo_write(fifo, ts, videoFrame);
 
 unlock_frames(_context);
 
@@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx)
   didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
  fromConnection:(AVCaptureConnection *)connection
 {
+AVFifoBuffer *fifo = _context->audio_fifo;
+int64_t ts = av_gettime_relative();
 lock_frames(_context);
 
-if (_context->current_audio_frame != nil) {
-CFRelease(_context->current_audio_frame);
+if (!av_fifo_space(fifo)) {
+av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame 
has been dropped\n");
+fifo_drain(fifo, 1);
 }
 
-_context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame);
+fifo_write(fifo, ts, audioFrame);
 
 unlock_frames(_context);
 
@@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx)
 ctx->avf_audio_delegate = NULL;
 
 av_freep(&ctx->audio_buffer);
+fifo_freep(&ctx->video_fifo);
+fifo_freep(&ctx->audio_fifo);
 
 pthread_mutex_destroy(&ctx->frame_lock);
-
-if (ctx->current_frame) {
-CFRelease(ctx->current_frame);
-}
 }
 
 static void parse_device_name(AVFormatContext *s)

[FFmpeg-devel] [PATCH] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive

2021-02-13 Thread mindmark

From: Mark Reid 

Hi,
This patch fixes audio issues I've had with some capture devices. The audio
gets really choppy and stops working. This seems to be because avf_read_packet
stops outputting the audio frames because a video frame happens to be available 
first.

It base on the approach used in a patch from #4437
https://trac.ffmpeg.org/ticket/4437

My approach uses an AVFifoBuffer instead of NSMutableArray and also
outputs the packets in the same order they arrive from AVFFoundation.

should fix ticket #4437 and #4513


---
 libavdevice/avfoundation.m | 160 -
 1 file changed, 124 insertions(+), 36 deletions(-)

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 59d5b0af4f..5ac6ec4183 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -31,13 +31,17 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
 #include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
 #include "libavformat/internal.h"
 #include "libavutil/internal.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/time.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/fifo.h"
 #include "avdevice.h"
 
+#define FIFO_SIZE 4
+
 static const int avf_time_base = 100;
 
 static const AVRational avf_time_base_q = {
@@ -128,8 +132,8 @@ typedef struct
 AVCaptureSession *capture_session;
 AVCaptureVideoDataOutput *video_output;
 AVCaptureAudioDataOutput *audio_output;
-CMSampleBufferRef current_frame;
-CMSampleBufferRef current_audio_frame;
+AVFifoBuffer *video_fifo;
+AVFifoBuffer *audio_fifo;
 
 AVCaptureDevice  *observed_device;
 #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
@@ -138,6 +142,11 @@ typedef struct
 int  observed_quit;
 } AVFContext;
 
+typedef struct {
+int64_t ts;
+CMSampleBufferRef frame;
+} BufferRef;
+
 static void lock_frames(AVFContext* ctx)
 {
 pthread_mutex_lock(&ctx->frame_lock);
@@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx)
 pthread_mutex_unlock(&ctx->frame_lock);
 }
 
+static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef 
frame)
+{
+BufferRef buf = {
+.ts= ts,
+.frame = frame,
+};
+
+CFRetain(frame);
+av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL);
+}
+
+static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf)
+{
+if (av_fifo_size(f)) {
+av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL);
+return;
+}
+buf->frame = nil;
+return;
+}
+
+static inline void fifo_drain(AVFifoBuffer* f, int release)
+{
+av_assert2(av_fifo_size(f) >= sizeof(BufferRef));
+if (release) {
+BufferRef buf;
+fifo_peek(f, &buf);
+CFRelease(buf.frame);
+}
+av_fifo_drain(f, sizeof(BufferRef));
+}
+
+static inline void fifo_freep(AVFifoBuffer **f)
+{
+if (f) {
+while (av_fifo_size(*f)) {
+fifo_drain(*f, 1);
+}
+av_fifo_freep(f);
+}
+}
+
 /** FrameReciever class - delegate for AVCaptureSession
  */
 @interface AVFFrameReceiver : NSObject
@@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx)
   didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
  fromConnection:(AVCaptureConnection *)connection
 {
+AVFifoBuffer *fifo = _context->video_fifo;
+int64_t ts = av_gettime_relative();
 lock_frames(_context);
 
-if (_context->current_frame != nil) {
-CFRelease(_context->current_frame);
+if (av_fifo_space(fifo) == 0) {
+av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame 
has been dropped\n");
+fifo_drain(fifo, 1);
 }
 
-_context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
+fifo_write(fifo, ts, videoFrame);
 
 unlock_frames(_context);
 
@@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx)
   didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
  fromConnection:(AVCaptureConnection *)connection
 {
+AVFifoBuffer *fifo = _context->audio_fifo;
+int64_t ts = av_gettime_relative();
 lock_frames(_context);
 
-if (_context->current_audio_frame != nil) {
-CFRelease(_context->current_audio_frame);
+if (!av_fifo_space(fifo)) {
+av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame 
has been dropped\n");
+fifo_drain(fifo, 1);
 }
 
-_context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame);
+fifo_write(fifo, ts, audioFrame);
 
 unlock_frames(_context);
 
@@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx)
 ctx->avf_audio_delegate = NULL;
 
 av_freep(&ctx->audio_buffer);
+fifo_freep(&ctx->video_fifo);
+fifo_freep(&ctx->audio_fifo);
 
 pthread_mutex_destroy(&ctx->frame_lock);
-
-if (ctx->current_frame) {
-CFRelease(ctx->current_frame);
-}
 }
 
 static void parse_device_name(AVForma

[FFmpeg-devel] [PATCH v2] avformat/mov: fix timecode with counter mode flag set

2021-01-16 Thread mindmark

From: Mark Reid 

The current behaviour ends up squaring the avg_frame_rate if the conter mode 
flag is set.
This messes up the timecode calculation, and looks to me as a regression that
seems to have been introduced 428b4aac.

Upon further testing is seems that no special case is need for having the 
counter flag set. 
av_timecode_init appears to handles the timecode correctly, at least in the 
sample files
I have.

Here is a sample mov file with the counter flag set
https://www.dropbox.com/s/5l4fucb9lhq523s/timecode_counter_mode.mov

before the patch ffmpeg will report the timecode as:
00:37:11:97 and warns that the timecode framerate is 57600/1002001

after patch:
14:50:55:02

---
 libavformat/mov.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 3215b53636..f8856a43dd 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2350,19 +2350,6 @@ FF_DISABLE_DEPRECATION_WARNINGS
 st->codec->time_base = av_inv_q(st->avg_frame_rate);
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
-/* adjust for per frame dur in counter mode */
-if (tmcd_ctx->tmcd_flags & 0x0008) {
-int timescale = AV_RB32(st->codecpar->extradata + 8);
-int framedur = AV_RB32(st->codecpar->extradata + 12);
-st->avg_frame_rate.num *= timescale;
-st->avg_frame_rate.den *= framedur;
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-st->codec->time_base.den *= timescale;
-st->codec->time_base.num *= framedur;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-}
 if (size > 30) {
 uint32_t len = AV_RB32(st->codecpar->extradata + 18); /* name 
atom length */
 uint32_t format = AV_RB32(st->codecpar->extradata + 22);
-- 
2.21.1 (Apple Git-122.3)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/1] avformat/mov: fix timecode with counter mode flag set

2020-11-28 Thread mindmark

From: Mark Reid 

The current behaviour ends up squaring the avg_frame_rate if the conter mode 
flag is set.
This messes up the timecode calculation, and looks to me as a regression that 
seems to have been introduced 428b4aac.

The new behaviour is use the "Number of frames" field for avg_frame_rate from 
the timecode atom as describe here:

https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP4939-CH205-69831

Number of frames
An 8-bit integer that contains the number of frames per second for the 
timecode format. 
If the time is a counter, this is the number of frames for each counter 
tick.

Here is a sample mov file with the counter flag set
https://www.dropbox.com/s/5l4fucb9lhq523s/timecode_counter_mode.mov

before the patch ffmpeg will report the timecode as:
00:37:11:97 and warns that the timecode framerate is 57600/1002001

after patch:
14:50:55:02

---
 libavformat/mov.c | 17 +
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 2b90e31170..76c1ceb82a 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2336,24 +2336,17 @@ static int mov_parse_stsd_data(MOVContext *c, 
AVIOContext *pb,
 tmcd_ctx->tmcd_flags = val;
 st->avg_frame_rate.num = AV_RB32(st->codecpar->extradata + 8); /* 
timescale */
 st->avg_frame_rate.den = AV_RB32(st->codecpar->extradata + 12); /* 
frameDuration */
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-st->codec->time_base = av_inv_q(st->avg_frame_rate);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
+
 /* adjust for per frame dur in counter mode */
 if (tmcd_ctx->tmcd_flags & 0x0008) {
-int timescale = AV_RB32(st->codecpar->extradata + 8);
-int framedur = AV_RB32(st->codecpar->extradata + 12);
-st->avg_frame_rate.num *= timescale;
-st->avg_frame_rate.den *= framedur;
+st->avg_frame_rate.num = st->codecpar->extradata[16] /* fps, 
frames per counter tick in counter mode */;
+st->avg_frame_rate.den = 1;
+}
 #if FF_API_LAVF_AVCTX
 FF_DISABLE_DEPRECATION_WARNINGS
-st->codec->time_base.den *= timescale;
-st->codec->time_base.num *= framedur;
+st->codec->time_base = av_inv_q(st->avg_frame_rate);
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
-}
 if (size > 30) {
 uint32_t len = AV_RB32(st->codecpar->extradata + 18); /* name 
atom length */
 uint32_t format = AV_RB32(st->codecpar->extradata + 22);
-- 
2.29.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] avcodec/exr: preserve half-float NaN bits and add fate test

2020-11-22 Thread mindmark

From: Mark Reid 

Hi,
This patch handles NaNs more like the offical implentation handles them, 
preserving
the original bits.

https://github.com/AcademySoftwareFoundation/openexr/blob/RB-2.5/IlmBase/Half/toFloat.cpp#L111

It also adds a fate test that is a 256x256 exr containing all possible 16bit 
half-float values. 

Here is a link to download the fate test file, if someone could add it to fate 
me
https://www.dropbox.com/s/2q4jg8w489aunsf/rgb_scanline_zip_half_float_0x0_to_0x.exr

---
 libavcodec/exr.c| 3 +--
 tests/fate/image.mak| 2 ++
 tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x | 6 ++
 3 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index d233dd43fb..6e6ce4275c 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -193,8 +193,7 @@ static union av_intfloat32 exr_half2float(uint16_t hf)
 // half-float NaNs will be converted to a single precision NaN
 // half-float Infs will be converted to a single precision Inf
 exp = FLOAT_MAX_BIASED_EXP;
-if (mantissa)
-mantissa = (1 << 23) - 1;// set all bits to indicate a NaN
+mantissa <<= 13; // preserve half-float NaN bits if set
 } else if (exp == 0x0) {
 // convert half-float zero/denorm to single precision value
 if (mantissa) {
diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 22072a62f1..c453f0f79c 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@@ -317,6 +317,8 @@ fate-exr-rgb-scanline-half-zip-dw-outside: CMD = framecrc 
-i $(TARGET_SAMPLES)/e
 FATE_EXR += fate-exr-rgb-tile-half-zip-dw-outside
 fate-exr-rgb-tile-half-zip-dw-outside: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/rgb_tile_half_zip_dw_outside.exr -pix_fmt gbrpf32le
 
+FATE_EXR += fate-exr-rgb-scanline-zip-half-0x0-0x
+fate-exr-rgb-scanline-zip-half-0x0-0x: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/rgb_scanline_zip_half_float_0x0_to_0x.exr -pix_fmt 
gbrpf32le
 
 FATE_EXR-$(call DEMDEC, IMAGE2, EXR) += $(FATE_EXR)
 
diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x 
b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
new file mode 100644
index 00..b6201116fe
--- /dev/null
+++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0x
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 256x256
+#sar 0: 1/1
+0,  0,  0,1,   786432, 0x1445e411
-- 
2.29.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/1] avcodec/exr: use lookuptable for alpha if there is no trc_func

2020-11-08 Thread mindmark

From: Mark Reid 

---
 libavcodec/exr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index cf7824402a..e907c5c464 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -1203,7 +1203,7 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 }
 } else if (s->pixel_type == EXR_HALF) {
 // 16-bit
-if (c < 3) {
+if (c < 3 || !trc_func) {
 for (x = 0; x < xsize; x++) {
 *ptr_x++ = 
s->gamma_table[bytestream_get_le16(&src)];
 }
-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/1] avfilter/vf_lut3d: fix sanitizef INF handling

2020-11-08 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_lut3d.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 988f6c8b55..172d6df0c8 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -107,7 +107,7 @@ typedef struct ThreadData {
 
 #define EXPONENT_MASK 0x7F80
 #define MANTISSA_MASK 0x007F
-#define SIGN_MASK 0x7FFF
+#define SIGN_MASK 0x8000
 
 static inline float sanitizef(float f)
 {
@@ -120,7 +120,7 @@ static inline float sanitizef(float f)
 return 0.0f;
 } else if (t.i & SIGN_MASK) {
 // -INF
-return FLT_MIN;
+return -FLT_MAX;
 } else {
 // +INF
 return FLT_MAX;
-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libswcale/input: use more accurate planer rgb16 yuv conversions

2020-10-03 Thread mindmark

From: Mark Reid 

These conversion appears to be exhibiting the same rounding error as the rgbf32 
formats where.
I seperated the rounding value from the 16 and 128 offsets, I think it makes it 
a little more clear.

---
 libswscale/input.c |  6 ++--
 tests/ref/fate/filter-pixfmts-scale| 32 +++---
 tests/ref/fate/psd-rgb48   |  2 +-
 tests/ref/fate/psd-rgba64  |  2 +-
 tests/ref/fate/sws-floatimg-cmp| 32 +++---
 tests/ref/vsynth/vsynth1-ffv1-v3-rgb48 |  4 +--
 tests/ref/vsynth/vsynth1-r210  |  4 +--
 tests/ref/vsynth/vsynth2-ffv1-v3-rgb48 |  4 +--
 tests/ref/vsynth/vsynth2-r210  |  4 +--
 tests/ref/vsynth/vsynth3-ffv1-v3-rgb48 |  4 +--
 tests/ref/vsynth/vsynth3-r210  |  4 +--
 tests/ref/vsynth/vsynth_lena-ffv1-v3-rgb48 |  4 +--
 tests/ref/vsynth/vsynth_lena-r210  |  4 +--
 13 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 67a85b0418..6850801a44 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -924,7 +924,7 @@ static av_always_inline void planar_rgb16_to_y(uint8_t 
*_dst, const uint8_t *_sr
 int b = rdpx(src[1] + i);
 int r = rdpx(src[2] + i);
 
-dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14));
+dst[i] = (ry*r + gy*g + by*b + (16 << (RGB2YUV_SHIFT + bpc - 8)) + (1 
<< (RGB2YUV_SHIFT + shift - 15))) >> (RGB2YUV_SHIFT + shift - 14);
 }
 }
 
@@ -957,8 +957,8 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,
 int b = rdpx(src[1] + i);
 int r = rdpx(src[2] + i);
 
-dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
-dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
+dstU[i] = (ru*r + gu*g + bu*b + (128 << (RGB2YUV_SHIFT + bpc - 8)) + 
(1 << (RGB2YUV_SHIFT + shift - 15))) >> (RGB2YUV_SHIFT + shift - 14);
+dstV[i] = (rv*r + gv*g + bv*b + (128 << (RGB2YUV_SHIFT + bpc - 8)) + 
(1 << (RGB2YUV_SHIFT + shift - 15))) >> (RGB2YUV_SHIFT + shift - 14);
 }
 }
 #undef rdpx
diff --git a/tests/ref/fate/filter-pixfmts-scale 
b/tests/ref/fate/filter-pixfmts-scale
index 30e7cd5b06..f47c9b887f 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -19,25 +19,25 @@ bgrad8316272bc3a360ef9dff3ecc84520a3
 bgra64be4e6a1b9f9c18b881c27d76611d45f737
 bgra64leefeee0abcc658ebcff049d5e74d74943
 gbrap   4a100f750ac846b34bfeef0d6893c3de
-gbrap10be   6d89abb9248006c3e9017545e9474654
-gbrap10le   cf974e23f485a10740f5de74a5c8c3df
-gbrap12be   1d9b57766ba9c2192403f43967cb9af0
-gbrap12le   bb1ba1c157717db3dd612a76d38a018e
-gbrap16be   c72b935a6e57a8e1c37bff08c2db55b1
-gbrap16le   13eb0e62b1ac9c1c86c81521eaefab5f
+gbrap10be   dc6aea3559ea4fcdda1ccc4f23d2f2fb
+gbrap10le   6e1cba57029fdf0f9d46b5e5cd55112b
+gbrap12be   dbe3a662c016563529032cd4dfb80262
+gbrap12le   24f5ecb32435b73353517e017c165e31
+gbrap16be   31968e6872a46e8174fb57f8920ed10d
+gbrap16le   8c6758f33671b673b6d30969fc05a23d
 gbrapf32be  366b804d5697276e8c481c4bdf05a00b
 gbrapf32le  558a268e6d6b907449d1056afab78f29
 gbrpdc3387f925f972c61aae7eb23cdc19f0
-gbrp10be0277d4c3a8498d75e2783fb81379e481
-gbrp10lef3d70f8ab845c3c9b8f7452e4a6e285a
-gbrp12befbd4e149c452c351c6d1c11d6b6e176a
-gbrp12lec51d51c3b753d735eb22983397262c88
-gbrp14becd20808592e62cc439786c18a14b3e70
-gbrp14le456f7d1ff5990aa6379137d84dd63862
-gbrp16be5fc826cfabebfc1442cb793c4b6303e2
-gbrp16le1b3e0b63d47a3e1b6b20931316883bf2
-gbrp9be d9c88968001e1452ff31fbc8d16b18a0
-gbrp9le 2ccfed0816bf6bd4bb3a5b7591d9603a
+gbrp10bea318ea42e53a7b80a55aa7c19c9a0ab5
+gbrp10le994e8fc6a1e5b230f4c55893fd7618d6
+gbrp12bebfbd419dd18c0a5677d4bce55ab24e2e
+gbrp12le95af1f8495b2a7a7ad67802e3e8bca01
+gbrp14be9d8113c9a5182c02dbe4576509f59a96
+gbrp14le952f39881e500ed684c8b216185f4b80
+gbrp16be5241eee3465096efa111b86b30c3
+gbrp16le5b8b997378ce31207f37059dbfb40c4a
+gbrp9be d7caf58cc3a74a036e11f924f03fc04c
+gbrp9le 010f7bcd8b2e17065d01a09f0d483218
 gbrpf32be   f3d0cefdf11c861001880772d817aac8
 gbrpf32le   290468205c1c18a0667edfca45061aee
 gray221201cc7cfc4964eacd8b3e426fd276
diff --git a/tests/ref/fate/psd-rgb48 b/tests/ref/fate/psd-rgb48
index f60cbab078..139b3c1df3 100644
--- a/tests/ref/fate/psd-rgb48
+++ b/tests/ref/fate/psd-rgb48
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #

[FFmpeg-devel] [PATCH v2 2/2] libswcale/input: use more accurate rgbf32 yuv conversions

2020-09-28 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c  |  12 ++-
 tests/ref/fate/filter-pixfmts-scale |   8 +-
 tests/ref/fate/sws-floatimg-cmp | 122 ++--
 3 files changed, 70 insertions(+), 72 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 064ed5902f..67a85b0418 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -984,15 +984,14 @@ static av_always_inline void planar_rgbf32_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,
 uint16_t *dstV   = (uint16_t *)_dstV;
 int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
 int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-int bpc = 16;
-int shift = 14;
+
 for (i = 0; i < width; i++) {
 int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
 int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
 int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
 
-dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
-dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> 
RGB2YUV_SHIFT;
 }
 }
 
@@ -1003,14 +1002,13 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t 
*_dst, const uint8_t *_s
 uint16_t *dst= (uint16_t *)_dst;
 
 int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-int bpc = 16;
-int shift = 14;
+
 for (i = 0; i < width; i++) {
 int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
 int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
 int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
 
-dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14));
+dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> 
RGB2YUV_SHIFT;
 }
 }
 
diff --git a/tests/ref/fate/filter-pixfmts-scale 
b/tests/ref/fate/filter-pixfmts-scale
index d7020ad2c3..30e7cd5b06 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -25,8 +25,8 @@ gbrap12be   1d9b57766ba9c2192403f43967cb9af0
 gbrap12le   bb1ba1c157717db3dd612a76d38a018e
 gbrap16be   c72b935a6e57a8e1c37bff08c2db55b1
 gbrap16le   13eb0e62b1ac9c1c86c81521eaefab5f
-gbrapf32be  42e53d9edccbd9e09c4cd78780ba92f3
-gbrapf32le  eebf3973ef94c841f0a1ceb1ed61621d
+gbrapf32be  366b804d5697276e8c481c4bdf05a00b
+gbrapf32le  558a268e6d6b907449d1056afab78f29
 gbrpdc3387f925f972c61aae7eb23cdc19f0
 gbrp10be0277d4c3a8498d75e2783fb81379e481
 gbrp10lef3d70f8ab845c3c9b8f7452e4a6e285a
@@ -38,8 +38,8 @@ gbrp16be5fc826cfabebfc1442cb793c4b6303e2
 gbrp16le1b3e0b63d47a3e1b6b20931316883bf2
 gbrp9be d9c88968001e1452ff31fbc8d16b18a0
 gbrp9le 2ccfed0816bf6bd4bb3a5b7591d9603a
-gbrpf32be   4614d32e4417f80e0adcc1bdcf6cde42
-gbrpf32le   1366ee77e5559672260bbe51040e28b2
+gbrpf32be   f3d0cefdf11c861001880772d817aac8
+gbrpf32le   290468205c1c18a0667edfca45061aee
 gray221201cc7cfc4964eacd8b3e426fd276
 gray10be9452756d0b37f4f5c7cae7635e22d747
 gray10le37fd2e1ec6b66410212d39a342e864df
diff --git a/tests/ref/fate/sws-floatimg-cmp b/tests/ref/fate/sws-floatimg-cmp
index 24204254c4..cf6788fc23 100644
--- a/tests/ref/fate/sws-floatimg-cmp
+++ b/tests/ref/fate/sws-floatimg-cmp
@@ -1,120 +1,120 @@
 gbrpf32le -> yuv444p16le -> gbrpf32le
-avg diff: 0.003852
+avg diff: 0.000125
 min diff: 0.00
-max diff: 0.006638
+max diff: 0.000501
 gbrpf32le -> yuv444p -> gbrpf32le
-avg diff: 0.004316
+avg diff: 0.001804
 min diff: 0.00
-max diff: 0.012704
+max diff: 0.006399
 gbrpf32le -> yuv444p9le -> gbrpf32le
-avg diff: 0.004053
-min diff: 0.01
-max diff: 0.009402
+avg diff: 0.000906
+min diff: 0.00
+max diff: 0.003313
 gbrpf32le -> yuv444p10le -> gbrpf32le
-avg diff: 0.003960
+avg diff: 0.000467
 min diff: 0.00
-max diff: 0.008123
+max diff: 0.001912
 gbrpf32le -> yuv444p12le -> gbrpf32le
-avg diff: 0.003878
+avg diff: 0.000166
 min diff: 0.00
-max diff: 0.007011
+max diff: 0.000802
 gbrpf32le -> yuv444p14le -> gbrpf32le
-avg diff: 0.003868
+avg diff: 0.000127
 min diff: 0.00
-max diff: 0.006729
+max diff: 0.000524
 gbrpf32le -> rgb24 -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.00
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> bgr24 -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.00
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> rgba -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.00
-max diff: 0.008975

[FFmpeg-devel] [PATCH v2 1/2] libswscale/tests: add floatimg_cmp test

2020-09-28 Thread mindmark

From: Mark Reid 

changes since v1:
- made into fate test
- fixed c90 warnings
- tests more intermediate formats 
- tested on BE mips too

---
 libswscale/Makefile |   1 +
 libswscale/tests/.gitignore |   1 +
 libswscale/tests/floatimg_cmp.c | 296 
 tests/fate/libswscale.mak   |   4 +
 tests/ref/fate/sws-floatimg-cmp | 120 +
 5 files changed, 422 insertions(+)
 create mode 100644 libswscale/tests/floatimg_cmp.c
 create mode 100644 tests/ref/fate/sws-floatimg-cmp

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 5e03e6fa0a..4b8f9de425 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -25,5 +25,6 @@ OBJS-$(CONFIG_SHARED)+= log2_tab.o
 SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o
 
 TESTPROGS = colorspace  \
+floatimg_cmp\
 pixdesc_query   \
 swscale \
diff --git a/libswscale/tests/.gitignore b/libswscale/tests/.gitignore
index 1a26f038c4..c56abf0ee7 100644
--- a/libswscale/tests/.gitignore
+++ b/libswscale/tests/.gitignore
@@ -1,3 +1,4 @@
 /colorspace
+/floatimg_cmp
 /pixdesc_query
 /swscale
diff --git a/libswscale/tests/floatimg_cmp.c b/libswscale/tests/floatimg_cmp.c
new file mode 100644
index 00..5c67594fb6
--- /dev/null
+++ b/libswscale/tests/floatimg_cmp.c
@@ -0,0 +1,296 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "libavutil/avutil.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intfloat.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/lfg.h"
+#include "libavutil/mem.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+
+#include "libswscale/swscale.h"
+
+#define DEFAULT_W 96
+#define DEFAULT_H 96
+
+static const enum AVPixelFormat pix_fmts[] = {
+AV_PIX_FMT_YUV444P16LE,
+AV_PIX_FMT_YUV444P,
+AV_PIX_FMT_YUV444P9LE, AV_PIX_FMT_YUV444P10LE,
+AV_PIX_FMT_YUV444P12LE, AV_PIX_FMT_YUV444P14LE,
+AV_PIX_FMT_RGB24,  AV_PIX_FMT_BGR24,
+AV_PIX_FMT_RGBA,   AV_PIX_FMT_BGRA,
+AV_PIX_FMT_ARGB,   AV_PIX_FMT_ABGR,
+AV_PIX_FMT_0RGB,   AV_PIX_FMT_0BGR,
+AV_PIX_FMT_RGB0,   AV_PIX_FMT_BGR0,
+AV_PIX_FMT_RGB48LE,  AV_PIX_FMT_BGR48LE,
+AV_PIX_FMT_RGBA64LE, AV_PIX_FMT_BGRA64LE,
+AV_PIX_FMT_GBRP,   AV_PIX_FMT_GBRAP,
+AV_PIX_FMT_GBRP9LE,
+AV_PIX_FMT_GBRP10LE, AV_PIX_FMT_GBRAP10LE,
+AV_PIX_FMT_GBRP12LE, AV_PIX_FMT_GBRAP12LE,
+AV_PIX_FMT_GBRP14LE,
+AV_PIX_FMT_GBRP16LE,  AV_PIX_FMT_GBRAP16LE
+};
+
+const char *usage =  "floatimg_cmp -pixel_format  -size  
-ref \n";
+
+int main(int argc, char **argv)
+{
+enum AVPixelFormat inFormat = AV_PIX_FMT_NONE;
+enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
+const AVPixFmtDescriptor *desc;
+uint8_t *ptr;
+uint32_t *in, *out;
+
+uint8_t *rgbIn[4]  = {NULL, NULL, NULL, NULL};
+uint8_t *rgbOut[4] = {NULL, NULL, NULL, NULL};
+int rgbStride[4];
+
+uint8_t *dst[4] = {NULL, NULL, NULL, NULL};
+int dstStride[4];
+
+int i, x, y, p, size, count;
+int res = -1;
+int w = -1;
+int h = -1;
+union av_intfloat32 v0, v1;
+
+double sum;
+float minimum, maximum, diff;
+
+struct SwsContext *sws = NULL;
+AVLFG rand;
+FILE *fp = NULL;
+
+for (i = 1; i < argc; i += 2) {
+if (argv[i][0] != '-' || i + 1 == argc)
+goto bad_option;
+if (!strcmp(argv[i], "-ref")) {
+fp = fopen(argv[i + 1], "rb");
+if (!fp) {
+fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
+goto end;
+}
+} else if (!strcmp(argv[i], "-size")) {
+res = av_parse_video_size(&w, &h, argv[i + 1]);
+if (res < 0) {
+fprintf(stderr, "invalid video size %s\n",  argv[i + 1]);
+goto end;
+}
+} else if (!strcmp(argv[i], "-pixel_format")) {
+inFormat = av_get_pix_fmt(argv[i + 1]);
+if (inFormat == AV_PIX_FMT_NONE) {
+fprintf(stde

[FFmpeg-devel] [PATCH] libavcodec/exr: fix incorrect translation of denorm mantissa

2020-09-14 Thread mindmark

From: Mark Reid 

Hi,
This fixes a very subtle error thats hard notice until up unpremultiply a image.

This loop is suppose to stop at first 1, instead was stoping at first 0
The comment is correct through!

openexrs implementation is very similar.
https://github.com/AcademySoftwareFoundation/openexr/blob/master/IlmBase/Half/toFloat.cpp#L85

not all the exr tests needed to be fixed because only some have denorm values

---
 libavcodec/exr.c   | 2 +-
 tests/ref/fate/exr-rgba-multiscanline-half-b44 | 2 +-
 tests/ref/fate/exr-slice-raw   | 2 +-
 tests/ref/fate/exr-slice-rle   | 2 +-
 tests/ref/fate/exr-slice-zip1  | 2 +-
 tests/ref/fate/exr-slice-zip16 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 829d38143d..216d216785 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -201,7 +201,7 @@ static union av_intfloat32 exr_half2float(uint16_t hf)
 mantissa <<= 1;
 exp = HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP;
 // check for leading 1 in denorm mantissa
-while ((mantissa & (1 << 10))) {
+while (!(mantissa & (1 << 10))) {
 // for every leading 0, decrement single precision exponent by 
1
 // and shift half-float mantissa value to the left
 mantissa <<= 1;
diff --git a/tests/ref/fate/exr-rgba-multiscanline-half-b44 
b/tests/ref/fate/exr-rgba-multiscanline-half-b44
index 964bf2e65e..24525b92a7 100644
--- a/tests/ref/fate/exr-rgba-multiscanline-half-b44
+++ b/tests/ref/fate/exr-rgba-multiscanline-half-b44
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 935x251
 #sar 0: 1/1
-0,  0,  0,1,  3754960, 0x4d48a1b2
+0,  0,  0,1,  3754960, 0x8d9af112
diff --git a/tests/ref/fate/exr-slice-raw b/tests/ref/fate/exr-slice-raw
index c7096e4d2a..1e7d3825ea 100644
--- a/tests/ref/fate/exr-slice-raw
+++ b/tests/ref/fate/exr-slice-raw
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 587x675
 #sar 0: 1/1
-0,  0,  0,1,  6339600, 0x4f2b496b
+0,  0,  0,1,  6339600, 0xda3e31df
diff --git a/tests/ref/fate/exr-slice-rle b/tests/ref/fate/exr-slice-rle
index c7096e4d2a..1e7d3825ea 100644
--- a/tests/ref/fate/exr-slice-rle
+++ b/tests/ref/fate/exr-slice-rle
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 587x675
 #sar 0: 1/1
-0,  0,  0,1,  6339600, 0x4f2b496b
+0,  0,  0,1,  6339600, 0xda3e31df
diff --git a/tests/ref/fate/exr-slice-zip1 b/tests/ref/fate/exr-slice-zip1
index c7096e4d2a..1e7d3825ea 100644
--- a/tests/ref/fate/exr-slice-zip1
+++ b/tests/ref/fate/exr-slice-zip1
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 587x675
 #sar 0: 1/1
-0,  0,  0,1,  6339600, 0x4f2b496b
+0,  0,  0,1,  6339600, 0xda3e31df
diff --git a/tests/ref/fate/exr-slice-zip16 b/tests/ref/fate/exr-slice-zip16
index c7096e4d2a..1e7d3825ea 100644
--- a/tests/ref/fate/exr-slice-zip16
+++ b/tests/ref/fate/exr-slice-zip16
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 587x675
 #sar 0: 1/1
-0,  0,  0,1,  6339600, 0x4f2b496b
+0,  0,  0,1,  6339600, 0xda3e31df
-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] avfilter/vf_premultiply: add missing AV_PIX_FMT_YUVA444P12

2020-09-13 Thread mindmark

From: Mark Reid 

query_formats says its supported, but is missing from switch statement leading 
to segfault

---
 libavfilter/vf_premultiply.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavfilter/vf_premultiply.c b/libavfilter/vf_premultiply.c
index 7e5b2aa97f..e051cadac0 100644
--- a/libavfilter/vf_premultiply.c
+++ b/libavfilter/vf_premultiply.c
@@ -546,6 +546,7 @@ static int filter_frame(AVFilterContext *ctx,
 case AV_PIX_FMT_YUV444P10:
 case AV_PIX_FMT_YUVA444P10:
 case AV_PIX_FMT_YUV444P12:
+case AV_PIX_FMT_YUVA444P12:
 case AV_PIX_FMT_YUV444P14:
 case AV_PIX_FMT_YUV444P16:
 case AV_PIX_FMT_YUVA444P16:
@@ -597,6 +598,7 @@ static int filter_frame(AVFilterContext *ctx,
 case AV_PIX_FMT_YUV444P10:
 case AV_PIX_FMT_YUVA444P10:
 case AV_PIX_FMT_YUV444P12:
+case AV_PIX_FMT_YUVA444P12:
 case AV_PIX_FMT_YUV444P14:
 case AV_PIX_FMT_YUV444P16:
 case AV_PIX_FMT_YUVA444P16:
-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 0/2] libswcale/input: fix incorrect rgbf32 yuv conversions

2020-09-12 Thread mindmark

From: Mark Reid 

Hi,
I'm not sure how I didn't notice this before but the f32 to yuv conversion
is slightly off. the folowinng 2 patches fix it.

In doing this I found that one of the exr tests probably wasn't testing the 
right 
layer, so I fixed that before.

Mark Reid (2):
  fate: use correct uint32 layer
  libswcale/input: fix incorrect rgbf32 yuv conversions

 libswscale/input.c   | 12 +---
 tests/fate/image.mak |  2 +-
 .../ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 |  2 +-
 tests/ref/fate/filter-pixfmts-scale  |  8 
 4 files changed, 11 insertions(+), 13 deletions(-)

-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/2] libswcale/input: fix incorrect rgbf32 yuv conversions

2020-09-12 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c  | 12 +---
 tests/ref/fate/filter-pixfmts-scale |  8 
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 064ed5902f..67a85b0418 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -984,15 +984,14 @@ static av_always_inline void planar_rgbf32_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,
 uint16_t *dstV   = (uint16_t *)_dstV;
 int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
 int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-int bpc = 16;
-int shift = 14;
+
 for (i = 0; i < width; i++) {
 int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
 int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
 int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
 
-dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
-dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
+dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> 
RGB2YUV_SHIFT;
+dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> 
RGB2YUV_SHIFT;
 }
 }
 
@@ -1003,14 +1002,13 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t 
*_dst, const uint8_t *_s
 uint16_t *dst= (uint16_t *)_dst;
 
 int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-int bpc = 16;
-int shift = 14;
+
 for (i = 0; i < width; i++) {
 int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
 int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
 int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
 
-dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14));
+dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> 
RGB2YUV_SHIFT;
 }
 }
 
diff --git a/tests/ref/fate/filter-pixfmts-scale 
b/tests/ref/fate/filter-pixfmts-scale
index d7020ad2c3..30e7cd5b06 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -25,8 +25,8 @@ gbrap12be   1d9b57766ba9c2192403f43967cb9af0
 gbrap12le   bb1ba1c157717db3dd612a76d38a018e
 gbrap16be   c72b935a6e57a8e1c37bff08c2db55b1
 gbrap16le   13eb0e62b1ac9c1c86c81521eaefab5f
-gbrapf32be  42e53d9edccbd9e09c4cd78780ba92f3
-gbrapf32le  eebf3973ef94c841f0a1ceb1ed61621d
+gbrapf32be  366b804d5697276e8c481c4bdf05a00b
+gbrapf32le  558a268e6d6b907449d1056afab78f29
 gbrpdc3387f925f972c61aae7eb23cdc19f0
 gbrp10be0277d4c3a8498d75e2783fb81379e481
 gbrp10lef3d70f8ab845c3c9b8f7452e4a6e285a
@@ -38,8 +38,8 @@ gbrp16be5fc826cfabebfc1442cb793c4b6303e2
 gbrp16le1b3e0b63d47a3e1b6b20931316883bf2
 gbrp9be d9c88968001e1452ff31fbc8d16b18a0
 gbrp9le 2ccfed0816bf6bd4bb3a5b7591d9603a
-gbrpf32be   4614d32e4417f80e0adcc1bdcf6cde42
-gbrpf32le   1366ee77e5559672260bbe51040e28b2
+gbrpf32be   f3d0cefdf11c861001880772d817aac8
+gbrpf32le   290468205c1c18a0667edfca45061aee
 gray221201cc7cfc4964eacd8b3e426fd276
 gray10be9452756d0b37f4f5c7cae7635e22d747
 gray10le37fd2e1ec6b66410212d39a342e864df
-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] fate: use correct uint32 layer

2020-09-12 Thread mindmark

From: Mark Reid 

---
 tests/fate/image.mak   | 2 +-
 tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 756d01c667..69b4ea5431 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@@ -221,7 +221,7 @@ FATE_EXR += fate-exr-rgb-scanline-pxr24-float-half-l2
 fate-exr-rgb-scanline-pxr24-float-half-l2: CMD = framecrc -layer 
"VRaySamplerInfo" -i $(TARGET_SAMPLES)/exr/rgb_scanline_pxr24_float_half.exr 
-pix_fmt gbrapf32le
 
 FATE_EXR += fate-exr-rgb-scanline-pxr24-half-uint32-13x9
-fate-exr-rgb-scanline-pxr24-half-uint32-13x9: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/rgb_scanline_pxr24_half_uint32_13x9.exr -pix_fmt rgb48le 
-vf scale
+fate-exr-rgb-scanline-pxr24-half-uint32-13x9: CMD = framecrc -layer 
"VRaySamplerInfo" -i 
$(TARGET_SAMPLES)/exr/rgb_scanline_pxr24_half_uint32_13x9.exr -pix_fmt rgb48le 
-vf scale
 
 FATE_EXR += fate-exr-rgb-scanline-zip-half-float-l1
 fate-exr-rgb-scanline-zip-half-float-l1: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/rgb_scanline_zip_half_float.exr -pix_fmt gbrpf32le
diff --git a/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9 
b/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9
index 523ed9c88b..2d209d8a63 100644
--- a/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9
+++ b/tests/ref/fate/exr-rgb-scanline-pxr24-half-uint32-13x9
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 13x9
 #sar 0: 9/10
-0,  0,  0,1,  702, 0x68c1450d
+0,  0,  0,1,  702, 0x86132f10
-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/1] avfilter/vf_premultiply: add support for gbrapf32 format

2020-09-09 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_premultiply.c | 118 ++-
 1 file changed, 115 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vf_premultiply.c b/libavfilter/vf_premultiply.c
index 5d053b1f77..7e5b2aa97f 100644
--- a/libavfilter/vf_premultiply.c
+++ b/libavfilter/vf_premultiply.c
@@ -73,7 +73,7 @@ static int query_formats(AVFilterContext *ctx)
 AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14,
 AV_PIX_FMT_YUV444P16,
 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
-AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, 
AV_PIX_FMT_GBRPF32,
 AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, 
AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
 AV_PIX_FMT_NONE
 };
@@ -82,7 +82,7 @@ static int query_formats(AVFilterContext *ctx)
 AV_PIX_FMT_YUVA444P,
 AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, 
AV_PIX_FMT_YUVA444P16,
 AV_PIX_FMT_GBRAP,
-AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, 
AV_PIX_FMT_GBRAPF32,
 AV_PIX_FMT_NONE
 };
 
@@ -218,6 +218,54 @@ static void premultiply16offset(const uint8_t *mmsrc, 
const uint8_t *aasrc,
 }
 }
 
+static void premultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc,
+  uint8_t *ddst,
+  ptrdiff_t mlinesize, ptrdiff_t alinesize,
+  ptrdiff_t dlinesize,
+  int w, int h,
+  int half, int shift, int offset)
+{
+const float *msrc = (const float *)mmsrc;
+const float *asrc = (const float *)aasrc;
+float *dst = (float *)ddst;
+int x, y;
+
+for (y = 0; y < h; y++) {
+for (x = 0; x < w; x++) {
+dst[x] = msrc[x] * asrc[x];
+}
+
+dst  += dlinesize / 4;
+msrc += mlinesize / 4;
+asrc += alinesize / 4;
+}
+}
+
+static void premultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc,
+uint8_t *ddst,
+ptrdiff_t mlinesize, ptrdiff_t alinesize,
+ptrdiff_t dlinesize,
+int w, int h,
+int half, int shift, int offset)
+{
+const float *msrc = (const float *)mmsrc;
+const float *asrc = (const float *)aasrc;
+float *dst = (float *)ddst;
+int x, y;
+
+float offsetf = offset / 65535.0f;
+
+for (y = 0; y < h; y++) {
+for (x = 0; x < w; x++) {
+dst[x] = ((msrc[x] - offsetf) * asrc[x]) + offsetf;
+}
+
+dst  += dlinesize / 4;
+msrc += mlinesize / 4;
+asrc += alinesize / 4;
+}
+}
+
 static void unpremultiply8(const uint8_t *msrc, const uint8_t *asrc,
uint8_t *dst,
ptrdiff_t mlinesize, ptrdiff_t alinesize,
@@ -365,6 +413,62 @@ static void unpremultiply16offset(const uint8_t *mmsrc, 
const uint8_t *aasrc,
 }
 }
 
+static void unpremultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc,
+uint8_t *ddst,
+ptrdiff_t mlinesize, ptrdiff_t alinesize,
+ptrdiff_t dlinesize,
+int w, int h,
+int half, int max, int offset)
+{
+const float *msrc = (const float *)mmsrc;
+const float *asrc = (const float *)aasrc;
+
+float *dst = (float *)ddst;
+int x, y;
+
+for (y = 0; y < h; y++) {
+for (x = 0; x < w; x++) {
+if (asrc[x] > 0.0f)
+dst[x] = msrc[x] / asrc[x];
+else
+dst[x] = msrc[x];
+}
+
+dst  += dlinesize / 4;
+msrc += mlinesize / 4;
+asrc += alinesize / 4;
+}
+}
+
+static void unpremultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc,
+uint8_t *ddst,
+ptrdiff_t mlinesize, ptrdiff_t alinesize,
+ptrdiff_t dlinesize,
+int w, int h,
+int half, int max, int offset)
+{
+const float *msrc = (const float *)mmsrc;
+const float *asrc = (const float *)aasrc;
+
+float *dst = (float *)ddst;
+int x, y;
+
+float offsetf = offset / 65535.0f;
+
+for (y = 0; y < h; y++) {
+for (x = 0; x < w; x++) {
+if (asrc[x] > 0.0f)
+dst[x] = (msrc[x] - offsetf) / asrc[x] + offsetf;
+else
+dst[x] = msrc[x];
+}
+
+dst  += dlinesize / 4;
+msrc += mlinesize / 4;
+asrc += alinesize / 4;
+}
+}
+
 static int premultiply_slice(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
 {
 PreMultiplyContext *s = ctx->pri

[FFmpeg-devel] [PATCH 1/1] avcodec/exr: add support data windows larger or outside display window

2020-09-07 Thread mindmark

From: Mark Reid 

Hi,
The following patch adds exr support for data windows that are larger or 
outside the display window. 
This adds support for both scanline and tiled formats.

Here are the added exr files for the fate tests. Could somebody upload them to 
fate for me?
https://www.dropbox.com/s/m0941dmjoejskp0/exr_datawindow_fate_files.zip

---
 libavcodec/exr.c  | 110 ++
 tests/fate/image.mak  |  31 +
 .../fate/exr-rgb-scanline-float-zip-dw-large  |   6 +
 .../fate/exr-rgb-scanline-half-piz-dw-large   |   6 +
 .../fate/exr-rgb-scanline-half-zip-dw-large   |   6 +
 .../fate/exr-rgb-scanline-half-zip-dw-outside |   6 +
 .../fate/exr-rgb-scanline-uint32-piz-dw-large |   6 +
 tests/ref/fate/exr-rgb-tile-half-piz-dw-large |   6 +
 tests/ref/fate/exr-rgb-tile-half-zip  |   6 +
 .../ref/fate/exr-rgb-tile-half-zip-dw-outside |   6 +
 .../ref/fate/exr-rgb-tile-uint32-piz-dw-large |   6 +
 tests/ref/fate/exr-ya-scanline-zip-half-12x8  |   6 +
 12 files changed, 155 insertions(+), 46 deletions(-)
 create mode 100644 tests/ref/fate/exr-rgb-scanline-float-zip-dw-large
 create mode 100644 tests/ref/fate/exr-rgb-scanline-half-piz-dw-large
 create mode 100644 tests/ref/fate/exr-rgb-scanline-half-zip-dw-large
 create mode 100644 tests/ref/fate/exr-rgb-scanline-half-zip-dw-outside
 create mode 100644 tests/ref/fate/exr-rgb-scanline-uint32-piz-dw-large
 create mode 100644 tests/ref/fate/exr-rgb-tile-half-piz-dw-large
 create mode 100644 tests/ref/fate/exr-rgb-tile-half-zip
 create mode 100644 tests/ref/fate/exr-rgb-tile-half-zip-dw-outside
 create mode 100644 tests/ref/fate/exr-rgb-tile-uint32-piz-dw-large
 create mode 100644 tests/ref/fate/exr-ya-scanline-zip-half-12x8

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 68d5befa40..d5f12cb22a 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -134,8 +134,8 @@ typedef struct EXRContext {
 const AVPixFmtDescriptor *desc;
 
 int w, h;
-uint32_t xmax, xmin;
-uint32_t ymax, ymin;
+int32_t xmax, xmin;
+int32_t ymax, ymin;
 uint32_t xdelta, ydelta;
 
 int scan_lines_per_block;
@@ -995,12 +995,13 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 uint64_t line_offset, uncompressed_size;
 uint8_t *ptr;
 uint32_t data_size;
-uint64_t line, col = 0;
+int line, col = 0;
 uint64_t tile_x, tile_y, tile_level_x, tile_level_y;
 const uint8_t *src;
 int step = s->desc->flags & AV_PIX_FMT_FLAG_FLOAT ? 4 : 2 * 
s->desc->nb_components;
-int axmax = (avctx->width - (s->xmax + 1)) * step; /* nb pixel to add at 
the right of the datawindow */
-int bxmin = s->xmin * step; /* nb pixel to add at the left of the 
datawindow */
+int bxmin, axmax, window_xoffset = 0;
+int window_xmin, window_xmax, window_ymin, window_ymax;
+int data_xoffset, data_yoffset, data_window_offset, xsize, ysize;
 int i, x, buf_size = s->buf_size;
 int c, rgb_channel_count;
 float one_gamma = 1.0f / s->gamma;
@@ -1029,28 +1030,16 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 return AVERROR_PATCHWELCOME;
 }
 
-if (s->xmin || s->ymin) {
-avpriv_report_missing_feature(s->avctx, "Tiles with xmin/ymin");
-return AVERROR_PATCHWELCOME;
-}
-
-line = s->tile_attr.ySize * tile_y;
+line = s->ymin + s->tile_attr.ySize * tile_y;
 col = s->tile_attr.xSize * tile_x;
 
 if (line < s->ymin || line > s->ymax ||
-col  < s->xmin || col  > s->xmax)
+s->xmin + col  < s->xmin ||  s->xmin + col  > s->xmax)
 return AVERROR_INVALIDDATA;
 
 td->ysize = FFMIN(s->tile_attr.ySize, s->ydelta - tile_y * 
s->tile_attr.ySize);
 td->xsize = FFMIN(s->tile_attr.xSize, s->xdelta - tile_x * 
s->tile_attr.xSize);
 
-if (col) { /* not the first tile of the line */
-bxmin = 0; /* doesn't add pixel at the left of the datawindow */
-}
-
-if ((col + td->xsize) != s->xdelta)/* not the last tile of the line */
-axmax = 0; /* doesn't add pixel at the right of the datawindow */
-
 td->channel_line_size = td->xsize * s->current_channel_offset;/* 
uncompress size of one line */
 uncompressed_size = td->channel_line_size * (uint64_t)td->ysize;/* 
uncompress size of the block */
 } else {
@@ -1081,6 +1070,33 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 }
 }
 
+window_xmin = FFMIN(avctx->width, FFMAX(0, s->xmin + col));
+window_xmax = FFMIN(avctx->width, FFMAX(0, s->xmin + col + td->xsize));
+window_ymin = FFMIN(avctx->height, FFMAX(0, line ));
+window_ymax = FFMIN(avctx->height, FFMAX(0, line + td->ysize));
+xsize = window_xmax - window_xmin;
+ysize = window_ymax - window_ymin;
+
+/* tile or scanline not visible skip decoding */
+if (xsize <= 0 || ysize <= 0)
+return 0;
+
+/* i

[FFmpeg-devel] [PATCH] avfilter/vf_lut3d: support remaping negative values in the prelut

2020-09-05 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_lut3d.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 6730a424ce..988f6c8b55 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -878,18 +878,16 @@ static int parse_cinespace(AVFilterContext *ctx, FILE *f)
 
 prelut_sizes[i] = npoints;
 in_min[i] = FLT_MAX;
-in_max[i] = FLT_MIN;
+in_max[i] = -FLT_MAX;
 out_min[i] = FLT_MAX;
-out_max[i] = FLT_MIN;
-
-last = FLT_MIN;
+out_max[i] = -FLT_MAX;
 
 for (int j = 0; j < npoints; j++) {
 NEXT_FLOAT_OR_GOTO(v, end)
 in_min[i] = FFMIN(in_min[i], v);
 in_max[i] = FFMAX(in_max[i], v);
 in_prelut[i][j] = v;
-if (v < last) {
+if (j > 0 && v < last) {
 av_log(ctx, AV_LOG_ERROR, "Invalid file, non 
increasing prelut.\n");
 ret = AVERROR(ENOMEM);
 goto end;
-- 
2.27.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d: prelut support for 3d cinespace luts

2020-05-23 Thread mindmark

From: Mark Reid 

changes since v1:
* cleaned up code style
* slightly reworked apply_lut functions to feel more consistent with code

---
 libavfilter/vf_lut3d.c | 372 +++--
 1 file changed, 317 insertions(+), 55 deletions(-)

diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 482e2394a7..e5d9fcc068 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -59,6 +59,15 @@ struct rgbvec {
 /* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
  * of 512x512 (64x64x64) */
 #define MAX_LEVEL 256
+#define PRELUT_SIZE 65536
+
+typedef struct Lut3DPreLut {
+int size;
+float min[3];
+float max[3];
+float scale[3];
+float* lut[3];
+} Lut3DPreLut;

 typedef struct LUT3DContext {
 const AVClass *class;
@@ -71,6 +80,7 @@ typedef struct LUT3DContext {
 struct rgbvec *lut;
 int lutsize;
 int lutsize2;
+Lut3DPreLut prelut;
 #if CONFIG_HALDCLUT_FILTER
 uint8_t clut_rgba_map[4];
 int clut_step;
@@ -234,11 +244,39 @@ static inline struct rgbvec interp_tetrahedral(const 
LUT3DContext *lut3d,
 return c;
 }

+static inline float prelut_interp_1d_linear(const Lut3DPreLut *prelut,
+int idx, const float s)
+{
+const int lut_max = prelut->size - 1;
+const float scaled = (s - prelut->min[idx]) * prelut->scale[idx];
+const float x = av_clipf(scaled, 0.0f, lut_max);
+const int prev = PREV(x);
+const int next = FFMIN((int)(x) + 1, lut_max);
+const float p = prelut->lut[idx][prev];
+const float n = prelut->lut[idx][next];
+const float d = x - (float)prev;
+return lerpf(p, n, d);
+}
+
+static inline struct rgbvec apply_prelut(const Lut3DPreLut *prelut,
+ const struct rgbvec *s)
+{
+if (prelut->size <= 0)
+return *s;
+
+struct rgbvec c;
+c.r = prelut_interp_1d_linear(prelut, 0, s->r);
+c.g = prelut_interp_1d_linear(prelut, 1, s->g);
+c.b = prelut_interp_1d_linear(prelut, 2, s->b);
+return c;
+}
+
 #define DEFINE_INTERP_FUNC_PLANAR(name, nbits, depth)  
\
 static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, 
int jobnr, int nb_jobs) \
 {  
\
 int x, y;  
\
 const LUT3DContext *lut3d = ctx->priv; 
\
+const Lut3DPreLut *prelut = &lut3d->prelut;
\
 const ThreadData *td = arg;
\
 const AVFrame *in  = td->in;   
\
 const AVFrame *out = td->out;  
\
@@ -253,9 +291,11 @@ static int 
interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i
 const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1];  
\
 const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2];  
\
 const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3];  
\
-const float scale_r = (lut3d->scale.r / ((1scale.g / ((1scale.b / ((1lutsize - 1;  
\
+const float scale_f = 1.0f / ((1scale.g * lut_max;
\
+const float scale_b = lut3d->scale.b * lut_max;
\

\
 for (y = slice_start; y < slice_end; y++) {
\
 uint##nbits##_t *dstg = (uint##nbits##_t *)grow;   
\
@@ -267,9 +307,13 @@ static int 
interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i
 const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow;
\
 const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow;
\
 for (x = 0; x < in->width; x++) {  
\
-const struct rgbvec scaled_rgb = {srcr[x] * scale_r,

[FFmpeg-devel] [PATCH 2/2] avfilter/vf_lut3d: prelut support for 3d cinespace luts

2020-05-18 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_lut3d.c | 367 +++--
 1 file changed, 312 insertions(+), 55 deletions(-)

diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 482e2394a7..4067c4a60a 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -59,6 +59,15 @@ struct rgbvec {
 /* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
  * of 512x512 (64x64x64) */
 #define MAX_LEVEL 256
+#define PRELUT_SIZE 65536
+
+typedef struct Lut3DPreLut {
+int size;
+float min[3];
+float max[3];
+float scale[3];
+float* lut[3];
+} Lut3DPreLut;
 
 typedef struct LUT3DContext {
 const AVClass *class;
@@ -71,6 +80,7 @@ typedef struct LUT3DContext {
 struct rgbvec *lut;
 int lutsize;
 int lutsize2;
+Lut3DPreLut prelut;
 #if CONFIG_HALDCLUT_FILTER
 uint8_t clut_rgba_map[4];
 int clut_step;
@@ -234,6 +244,31 @@ static inline struct rgbvec interp_tetrahedral(const 
LUT3DContext *lut3d,
 return c;
 }
 
+static inline float apply_prelut_channel(const LUT3DContext *lut3d, float x, 
int c)
+{
+
+x = (x - lut3d->prelut.min[c]) * lut3d->prelut.scale[c];
+x = av_clipf(x, 0.0f, lut3d->prelut.size-1);
+
+const float a = lut3d->prelut.lut[c][PREV(x)];
+const float b = lut3d->prelut.lut[c][(FFMIN((int)(x) + 1, 
lut3d->prelut.size - 1))];
+const float mix = x - (float)PREV(x);
+
+return lerpf(a, b, mix);
+}
+
+static inline struct rgbvec apply_prelut(const LUT3DContext *lut3d, const 
struct rgbvec *s)
+{
+if (lut3d->prelut.size <= 0)
+return *s;
+
+struct rgbvec c;
+c.r = apply_prelut_channel(lut3d, s->r, 0);
+c.g = apply_prelut_channel(lut3d, s->g, 1);
+c.b = apply_prelut_channel(lut3d, s->b, 2);
+return c;
+}
+
 #define DEFINE_INTERP_FUNC_PLANAR(name, nbits, depth)  
\
 static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, 
int jobnr, int nb_jobs) \
 {  
\
@@ -253,9 +288,11 @@ static int 
interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i
 const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1];  
\
 const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2];  
\
 const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3];  
\
-const float scale_r = (lut3d->scale.r / ((1scale.g / ((1scale.b / ((1lutsize - 1;  
\
+const float scale_f = 1.0f / ((1scale.g * lut_max;
\
+const float scale_b = lut3d->scale.b * lut_max;
\

\
 for (y = slice_start; y < slice_end; y++) {
\
 uint##nbits##_t *dstg = (uint##nbits##_t *)grow;   
\
@@ -267,9 +304,13 @@ static int 
interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, i
 const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow;
\
 const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow;
\
 for (x = 0; x < in->width; x++) {  
\
-const struct rgbvec scaled_rgb = {srcr[x] * scale_r,   
\
-  srcg[x] * scale_g,   
\
-  srcb[x] * scale_b};  
\
+const struct rgbvec rgb = {srcr[x] * scale_f,  
\
+   srcg[x] * scale_f,  
\
+   srcb[x] * scale_f}; 
\
+const struct rgbvec prelut_rgb = apply_prelut(lut3d, &rgb);
\
+const struct rgbvec scaled_rgb = {av_clipf(prelut_rgb.r * scale_r, 
0, lut_max),\
+  av_clipf(prelut_rgb.g * scale_g, 
0, lut_max),\
+  av_clipf(prelut_rgb.b *

[FFmpeg-devel] [PATCH 1/2] avfilter/vf_lut3d: initial float pixel format support

2020-05-18 Thread mindmark

From: Mark Reid 

---
 libavfilter/vf_lut3d.c | 207 +++--
 1 file changed, 201 insertions(+), 6 deletions(-)

diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index fda85b16b4..482e2394a7 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -24,9 +24,12 @@
  * 3D Lookup table filter
  */
 
+#include "float.h"
+
 #include "libavutil/opt.h"
 #include "libavutil/file.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat.h"
 #include "libavutil/avassert.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/avstring.h"
@@ -73,6 +76,7 @@ typedef struct LUT3DContext {
 int clut_step;
 int clut_bits;
 int clut_planar;
+int clut_float;
 int clut_width;
 FFFrameSync fs;
 #endif
@@ -91,6 +95,30 @@ typedef struct ThreadData {
 { "tetrahedral", "interpolate values using a tetrahedron", 
   0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TETRAHEDRAL}, INT_MIN, INT_MAX, 
FLAGS, "interp_mode" }, \
 { NULL }
 
+#define EXPONENT_MASK 0x7F80
+#define MANTISSA_MASK 0x007F
+#define SIGN_MASK 0x7FFF
+
+static inline float sanitizef(float f)
+{
+union av_intfloat32 t;
+t.f = f;
+
+if ((t.i & EXPONENT_MASK) == EXPONENT_MASK) {
+if ((t.i & MANTISSA_MASK) != 0) {
+// NAN
+return 0.0f;
+} else if (t.i & SIGN_MASK) {
+// -INF
+return FLT_MIN;
+} else {
+// +INF
+return FLT_MAX;
+}
+}
+return f;
+}
+
 static inline float lerpf(float v0, float v1, float f)
 {
 return v0 + (v1 - v0) * f;
@@ -285,6 +313,66 @@ DEFINE_INTERP_FUNC_PLANAR(nearest, 16, 16)
 DEFINE_INTERP_FUNC_PLANAR(trilinear,   16, 16)
 DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 16)
 
+#define DEFINE_INTERP_FUNC_PLANAR_FLOAT(name, depth)   
\
+static int interp_##name##_pf##depth(AVFilterContext *ctx, void *arg, int 
jobnr, int nb_jobs)  \
+{  
\
+int x, y;  
\
+const LUT3DContext *lut3d = ctx->priv; 
\
+const ThreadData *td = arg;
\
+const AVFrame *in  = td->in;   
\
+const AVFrame *out = td->out;  
\
+const int direct = out == in;  
\
+const int slice_start = (in->height *  jobnr   ) / nb_jobs;
\
+const int slice_end   = (in->height * (jobnr+1)) / nb_jobs;
\
+uint8_t *grow = out->data[0] + slice_start * out->linesize[0]; 
\
+uint8_t *brow = out->data[1] + slice_start * out->linesize[1]; 
\
+uint8_t *rrow = out->data[2] + slice_start * out->linesize[2]; 
\
+uint8_t *arow = out->data[3] + slice_start * out->linesize[3]; 
\
+const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0];  
\
+const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1];  
\
+const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2];  
\
+const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3];  
\
+const float lutsize = lut3d->lutsize - 1;  
\
+const float scale_r = lut3d->scale.r * lutsize;
\
+const float scale_g = lut3d->scale.g * lutsize;
\
+const float scale_b = lut3d->scale.b * lutsize;
\
+   
\
+for (y = slice_start; y < slice_end; y++) {
\
+float *dstg = (float *)grow;   
\
+float *dstb = (float *)brow;   
\
+float *dstr = (float *)rrow;   
\
+float *dsta = (float *)arow;   
\
+const float *srcg = (const float *)srcgrow;

[FFmpeg-devel] [PATCH] libswscale: fix for floating point formats, require full chroma

2020-05-10 Thread mindmark

From: Mark Reid 

upon more floating point testing, looks like I missed adding this bit.

---
 libswscale/utils.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index 15c0a19afa..111062e915 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1405,6 +1405,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter 
*srcFilter,
 srcFormat != AV_PIX_FMT_GBRP14BE  && srcFormat != AV_PIX_FMT_GBRP14LE 
&&
 srcFormat != AV_PIX_FMT_GBRP16BE  && srcFormat != AV_PIX_FMT_GBRP16LE 
&&
 srcFormat != AV_PIX_FMT_GBRAP16BE  && srcFormat != 
AV_PIX_FMT_GBRAP16LE &&
+srcFormat != AV_PIX_FMT_GBRPF32BE  && srcFormat != 
AV_PIX_FMT_GBRPF32LE &&
+srcFormat != AV_PIX_FMT_GBRAPF32BE && srcFormat != 
AV_PIX_FMT_GBRAPF32LE &&
 ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) ||
  (flags & SWS_FAST_BILINEAR)))
 c->chrSrcHSubSample = 1;
--
2.25.0
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] avcodec/exr: output float pixels in float pixel format

2020-05-09 Thread mindmark

From: Mark Reid 

changes since v1
- default behavior, no longer hidden behind decoder parameter
- updated tests to reflect change

---
 libavcodec/exr.c  | 244 +-
 tests/fate/image.mak  | 120 -
 tests/ref/fate/exr-rgb-b44a-half-negative-4x4 |   2 +-
 .../exr-rgb-scanline-b44-half-float-12x8-l1   |   2 +-
 .../exr-rgb-scanline-b44-half-float-12x8-l2   |   2 +-
 tests/ref/fate/exr-rgb-scanline-float-b44 |   2 +-
 .../ref/fate/exr-rgb-scanline-float-piz-48x32 |   2 +-
 tests/ref/fate/exr-rgb-scanline-half-b44-12x8 |   2 +-
 tests/ref/fate/exr-rgb-scanline-half-b44-13x9 |   2 +-
 tests/ref/fate/exr-rgb-scanline-half-piz-bw   |   2 +-
 .../ref/fate/exr-rgb-scanline-half-piz-color  |   2 +-
 .../ref/fate/exr-rgb-scanline-half-piz-dw-t01 |   2 +-
 .../ref/fate/exr-rgb-scanline-half-piz-dw-t08 |   2 +-
 .../fate/exr-rgb-scanline-none-negative-red   |   2 +-
 .../fate/exr-rgb-scanline-pxr24-float-12x8|   2 +-
 .../fate/exr-rgb-scanline-pxr24-float-half-l1 |   2 +-
 .../fate/exr-rgb-scanline-pxr24-float-half-l2 |   2 +-
 .../fate/exr-rgb-scanline-pxr24-half-float-l1 |   2 +-
 .../fate/exr-rgb-scanline-pxr24-half-float-l2 |   2 +-
 .../exr-rgb-scanline-pxr24-half-uint32-13x9   |   2 +-
 .../fate/exr-rgb-scanline-raw-half-float-l1   |   2 +-
 .../fate/exr-rgb-scanline-raw-half-float-l2   |   2 +-
 .../fate/exr-rgb-scanline-rle-half-float-l1   |   2 +-
 .../fate/exr-rgb-scanline-rle-half-float-l2   |   2 +-
 .../fate/exr-rgb-scanline-zip-half-float-l1   |   2 +-
 .../fate/exr-rgb-scanline-zip-half-float-l2   |   2 +-
 .../fate/exr-rgb-scanline-zip1-half-float-l1  |   2 +-
 ...b-scanline-zip1-half-float-l1-zero-offsets |   2 +-
 .../fate/exr-rgb-scanline-zip1-half-float-l2  |   2 +-
 tests/ref/fate/exr-rgb-tile-float-raw-12x8|   2 +-
 tests/ref/fate/exr-rgb-tile-float-raw-150x130 |   2 +-
 .../fate/exr-rgb-tile-half-float-b44-12x8-l1  |   2 +-
 .../fate/exr-rgb-tile-half-float-b44-12x8-l2  |   2 +-
 tests/ref/fate/exr-rgb-tile-half-raw-12x8 |   2 +-
 .../ref/fate/exr-rgb-tile-pxr24-float-half-l1 |   2 +-
 .../ref/fate/exr-rgb-tile-pxr24-float-half-l2 |   2 +-
 .../ref/fate/exr-rgb-tile-pxr24-half-float-l1 |   2 +-
 .../ref/fate/exr-rgb-tile-pxr24-half-float-l2 |   2 +-
 tests/ref/fate/exr-rgb-tile-raw-half-float-l1 |   2 +-
 tests/ref/fate/exr-rgb-tile-raw-half-float-l2 |   2 +-
 tests/ref/fate/exr-rgb-tile-rle-half-float-l1 |   2 +-
 tests/ref/fate/exr-rgb-tile-rle-half-float-l2 |   2 +-
 tests/ref/fate/exr-rgb-tile-zip-half-float-l1 |   2 +-
 tests/ref/fate/exr-rgb-tile-zip-half-float-l2 |   2 +-
 .../ref/fate/exr-rgb-tile-zip1-half-float-l1  |   2 +-
 .../ref/fate/exr-rgb-tile-zip1-half-float-l2  |   2 +-
 .../ref/fate/exr-rgba-multiscanline-half-b44  |   2 +-
 .../exr-rgba-scanline-float-half-b44-12x8-l1  |   2 +-
 .../exr-rgba-scanline-float-half-b44-12x8-l2  |   2 +-
 .../exr-rgba-scanline-float-half-b44-13x9-l1  |   2 +-
 .../exr-rgba-scanline-float-half-b44-13x9-l2  |   2 +-
 .../exr-rgba-scanline-float-half-b44a-12x8-l1 |   2 +-
 .../exr-rgba-scanline-float-half-b44a-12x8-l2 |   2 +-
 .../exr-rgba-scanline-float-half-b44a-13x9-l1 |   2 +-
 .../exr-rgba-scanline-float-half-b44a-13x9-l2 |   2 +-
 tests/ref/fate/exr-rgba-zip16-16x32-flag4 |   2 +-
 tests/ref/fate/exr-slice-pxr24|   2 +-
 tests/ref/fate/exr-slice-raw  |   2 +-
 tests/ref/fate/exr-slice-rle  |   2 +-
 tests/ref/fate/exr-slice-zip1 |   2 +-
 tests/ref/fate/exr-slice-zip16|   2 +-
 tests/ref/fate/exr-y-scanline-zip-half-12x8   |   2 +-
 tests/ref/fate/exr-y-tile-zip-half-12x8   |   2 +-
 63 files changed, 246 insertions(+), 240 deletions(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 73419eadb1..68d5befa40 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -30,7 +30,6 @@
  * For more information on the OpenEXR format, visit:
  *  http://openexr.com/
  *
- * exr_flt2uint() and exr_halflt2uint() is credited to Reimar Döffinger.
  * exr_half2float() is credited to Aaftab Munshi, Dan Ginsburg, Dave Shreiner.
  */

@@ -160,7 +159,7 @@ typedef struct EXRContext {

 enum AVColorTransferCharacteristic apply_trc_type;
 float gamma;
-uint16_t gamma_table[65536];
+union av_intfloat32 gamma_table[65536];
 } EXRContext;

 /* -15 stored using a single precision bias of 127 */
@@ -225,47 +224,6 @@ static union av_intfloat32 exr_half2float(uint16_t hf)
 return f;
 }

-
-/**
- * Convert from 32-bit float as uint32_t to uint16_t.
- *
- * @param v 32-bit float
- *
- * @return normalized 16-bit unsigned int
- */
-static inline uint16_t exr_flt2uint(int32_t v)
-{
-int32_t exp = v >> 23;
-// "HACK": negative values result in exp<  0, so clipping them to 0
-// is also handled by this condition, avoids explicit check for sign bit.
-if (exp <= 127 + 7 - 24) // we would shift out all bits anyway
-return 0;
-if (exp >= 127)
-ret

[FFmpeg-devel] [PATCH v2 2/2] libswscale: add output support for AV_PIX_FMT_GBRAPF32

2020-05-03 Thread mindmark

From: Mark Reid 

---
 libswscale/output.c  | 82 
 libswscale/slice.c   | 28 
 libswscale/swscale.c |  5 ++
 libswscale/swscale_internal.h| 36 +++
 libswscale/swscale_unscaled.c| 33 ++
 libswscale/utils.c   |  8 +--
 tests/ref/fate/filter-pixdesc-gbrapf32be |  1 +
 tests/ref/fate/filter-pixdesc-gbrapf32le |  1 +
 tests/ref/fate/filter-pixdesc-gbrpf32be  |  1 +
 tests/ref/fate/filter-pixdesc-gbrpf32le  |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 20 files changed, 221 insertions(+), 15 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 68f43ffba3..e864e515d0 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2312,6 +2312,82 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t **dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
+uint32_t **dest32 = (uint32_t**)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += 1 << 13;
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(Y + R, 30);
+G = av_clip_uintp2(Y + G, 30);
+B = av_clip_uintp2(Y + B, 30);
+
+dest32[0][i] = av_float2int(float_mult * (float)(G >> 14));
+dest32[1][i] = av_float2int(float_mult * (float)(B >> 14));
+dest32[2][i] = av_float2int(float_mult * (float)(R >> 14));
+if (hasAlpha)
+dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 
30) >> 14));
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+for (i = 0; i < dstW; i++) {
+dest32[0][i] = av_bswap32(dest32[0][i]);
+dest32[1][i] = av_bswap32(dest32[1][i]);
+dest32[2][i] = av_bswap32(dest32[2][i]);
+if (hasAlpha)
+dest32[3][i] = av_bswap32(dest32[3][i]);
+}
+}
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2716,6 +2792,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 case AV_PIX_FMT_GBRAP16LE:
 *yuv2anyX = yuv2gbrp16_full_X_c;
 break;
+case AV_PIX_FMT_GBRPF32BE:
+case AV_PIX_FMT_GBRPF32LE:
+case AV_PIX_FMT_GBRAPF32BE:
+case AV_PIX_FMT_GBRAPF32LE:
+*yuv2anyX = yuv2gbrpf32_full_X_c;
+break;
 }
 if (!*yuv2packedX && !*yuv2anyX)
 goto YUV_PACKED;
diff --git a/libswscale/slice.c b/libswscale/slice.c
index db4fa874ff..7849b70f4d 100644

[FFmpeg-devel] [PATCH v2 1/2] libswscale: add input support AV_PIX_FMT_GBRAPF32

2020-05-03 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c | 91 ++
 libswscale/utils.c |  4 ++
 2 files changed, 95 insertions(+)

diff --git a/libswscale/input.c b/libswscale/input.c
index 099661cb6d..e74cf04133 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -960,6 +960,59 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,
 }
 #undef rdpx
 
+#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): 
av_int2float(AV_RL32(src)))
+
+static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t 
*_src[4], int width, int is_be, int32_t *rgb2yuv)
+{
+int i;
+const float **src = (const float **)_src;
+uint16_t *dst= (uint16_t *)_dst;
+
+for (i = 0; i < width; i++) {
+dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i)));
+}
+}
+
+static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t 
*_dstV, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv)
+{
+int i;
+const float **src = (const float **)_src;
+uint16_t *dstU   = (uint16_t *)_dstU;
+uint16_t *dstV   = (uint16_t *)_dstV;
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int bpc = 16;
+int shift = 14;
+for (i = 0; i < width; i++) {
+int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
+int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
+int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
+
+dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
+dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
+}
+}
+
+static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t 
*_src[4], int width, int is_be, int32_t *rgb2yuv)
+{
+int i;
+const float **src = (const float **)_src;
+uint16_t *dst= (uint16_t *)_dst;
+
+int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+int bpc = 16;
+int shift = 14;
+for (i = 0; i < width; i++) {
+int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
+int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
+int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
+
+dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14));
+}
+}
+
+#undef rdpx
+
 static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused1,
 const uint8_t *unused2, int width, 
uint32_t *unused)
 {
@@ -1022,6 +1075,26 @@ rgb9plus_planar_transparency_funcs(10)
 rgb9plus_planar_transparency_funcs(12)
 rgb9plus_planar_transparency_funcs(16)
 
+#define rgbf32_planar_funcs_endian(endian_name, endian)
 \
+static void planar_rgbf32##endian_name##_to_y(uint8_t *dst, const uint8_t 
*src[4],  \
+  int w, int32_t *rgb2yuv) 
 \
+{  
 \
+planar_rgbf32_to_y(dst, src, w, endian, rgb2yuv);  
 \
+}  
 \
+static void planar_rgbf32##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV,   
 \
+   const uint8_t *src[4], int 
w, int32_t *rgb2yuv)  \
+{  
 \
+planar_rgbf32_to_uv(dstU, dstV, src, w, endian, rgb2yuv);  
 \
+}  
 \
+static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t 
*src[4],  \
+  int w, int32_t *rgb2yuv) 
 \
+{  
 \
+planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv);  
 \
+}
+
+rgbf32_planar_funcs_endian(le, 0)
+rgbf32_planar_funcs_endian(be, 1)
+
 av_cold void ff_sws_init_input_funcs(SwsContext *c)
 {
 enum AVPixelFormat srcFormat = c->srcFormat;
@@ -1070,6 +1143,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_GBRP16LE:
 c->readChrPlanar = planar_rgb16le_to_uv;
 break;
+case AV_PIX_FMT_GBRAPF32LE:
+case AV_PIX_FMT_GBRPF32LE:
+c->readChrPlanar = planar_rgbf32le_to_uv;
+break;
 case AV_PIX_F

[FFmpeg-devel] [PATCH v2 0/2] initial input/output support for AV_PIX_FMT_GBRAPF32

2020-05-03 Thread mindmark

From: Mark Reid 

changes since v1
- added missing fillPlane32 function
- tests should pass now for qemu-mips
- removed exr patch for now

Mark Reid (2):
  libswscale: add input support AV_PIX_FMT_GBRAPF32
  libswscale: add output support for AV_PIX_FMT_GBRAPF32

 libswscale/input.c   | 91 
 libswscale/output.c  | 82 +
 libswscale/slice.c   | 28 +---
 libswscale/swscale.c |  5 ++
 libswscale/swscale_internal.h| 36 ++
 libswscale/swscale_unscaled.c| 33 +
 libswscale/utils.c   |  4 ++
 tests/ref/fate/filter-pixdesc-gbrapf32be |  1 +
 tests/ref/fate/filter-pixdesc-gbrapf32le |  1 +
 tests/ref/fate/filter-pixdesc-gbrpf32be  |  1 +
 tests/ref/fate/filter-pixdesc-gbrpf32le  |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 21 files changed, 312 insertions(+), 11 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le

--
2.25.0
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 3/3] avcodec/exr: add option to output pixels in float

2020-04-28 Thread mindmark

From: Mark Reid 

---
 libavcodec/exr.c | 103 +++
 1 file changed, 86 insertions(+), 17 deletions(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 73419eadb1..f86e97a433 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -161,6 +161,7 @@ typedef struct EXRContext {
 enum AVColorTransferCharacteristic apply_trc_type;
 float gamma;
 uint16_t gamma_table[65536];
+int output_float;
 } EXRContext;
 
 /* -15 stored using a single precision bias of 127 */
@@ -1035,14 +1036,14 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 const uint8_t *channel_buffer[4] = { 0 };
 const uint8_t *buf = s->buf;
 uint64_t line_offset, uncompressed_size;
-uint16_t *ptr_x;
 uint8_t *ptr;
 uint32_t data_size;
 uint64_t line, col = 0;
 uint64_t tile_x, tile_y, tile_level_x, tile_level_y;
 const uint8_t *src;
-int axmax = (avctx->width - (s->xmax + 1)) * 2 * s->desc->nb_components; 
/* nb pixel to add at the right of the datawindow */
-int bxmin = s->xmin * 2 * s->desc->nb_components; /* nb pixel to add at 
the left of the datawindow */
+int step = s->desc->flags & AV_PIX_FMT_FLAG_FLOAT ? 4 : 2 * 
s->desc->nb_components;
+int axmax = (avctx->width - (s->xmax + 1)) * step; /* nb pixel to add at 
the right of the datawindow */
+int bxmin = s->xmin * step; /* nb pixel to add at the left of the 
datawindow */
 int i, x, buf_size = s->buf_size;
 int c, rgb_channel_count;
 float one_gamma = 1.0f / s->gamma;
@@ -1175,6 +1176,58 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 if (s->channel_offsets[3] >= 0)
 channel_buffer[3] = src + td->xsize * s->channel_offsets[3];
 
+if (s->desc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+
+/* todo: change this when a floating point pixel format with luma with 
alpha is implemented */
+int channel_count = s->channel_offsets[3] >= 0 ? 4 : rgb_channel_count;
+if (s->is_luma) {
+channel_buffer[1] = channel_buffer[0];
+channel_buffer[2] = channel_buffer[0];
+}
+
+for (c = 0; c < channel_count; c++) {
+int plane = s->desc->comp[c].plane;
+ptr = p->data[plane] + line * p->linesize[plane] + (col * 4);
+
+for (i = 0; i < td->ysize; i++, ptr += p->linesize[plane]) {
+const uint8_t *src;
+union av_intfloat32 *ptr_x;
+
+src = channel_buffer[c];
+ptr_x = (union av_intfloat32 *)ptr;
+
+// Zero out the start if xmin is not 0
+memset(ptr_x, 0, bxmin);
+ptr_x += s->xmin;
+
+if (s->pixel_type == EXR_FLOAT) {
+// 32-bit
+for (x = 0; x < td->xsize; x++) {
+ptr_x->i = bytestream_get_le32(&src);
+ptr_x++;
+}
+} else if (s->pixel_type == EXR_HALF) {
+// 16-bit
+for (x = 0; x < td->xsize; x++) {
+*ptr_x++ = exr_half2float(bytestream_get_le16(&src));
+}
+} else if (s->pixel_type == EXR_UINT) {
+const float float_mult = 1.0f / (float)UINT32_MAX;
+for (x = 0; x < td->xsize; x++) {
+ptr_x->f = float_mult * 
(float)bytestream_get_le32(&src);
+ptr_x++;
+}
+}
+
+// Zero out the end if xmax+1 is not w
+memset(ptr_x, 0, axmax);
+channel_buffer[c] += td->channel_line_size;
+}
+}
+
+return 0;
+}
+
 ptr = p->data[0] + line * p->linesize[0] + (col * s->desc->nb_components * 
2);
 
 for (i = 0;
@@ -1182,6 +1235,7 @@ static int decode_block(AVCodecContext *avctx, void 
*tdata,
 
 const uint8_t * a;
 const uint8_t *rgb[3];
+uint16_t *ptr_x;
 
 for (c = 0; c < rgb_channel_count; c++) {
 rgb[c] = channel_buffer[c];
@@ -1676,7 +1730,8 @@ static int decode_frame(AVCodecContext *avctx, void *data,
 AVFrame *picture = data;
 uint8_t *ptr;
 
-int y, ret;
+int i, y, ret;
+int planes;
 int out_line_size;
 int nb_blocks;   /* nb scanline or nb tile */
 uint64_t start_offset_table;
@@ -1694,15 +1749,16 @@ static int decode_frame(AVCodecContext *avctx, void 
*data,
 case EXR_UINT:
 if (s->channel_offsets[3] >= 0) {
 if (!s->is_luma) {
-avctx->pix_fmt = AV_PIX_FMT_RGBA64;
+avctx->pix_fmt = s->output_float ? AV_PIX_FMT_GBRAPF32 : 
AV_PIX_FMT_RGBA64;
 } else {
-avctx->pix_fmt = AV_PIX_FMT_YA16;
+/* todo: change this when a floating point pixel format with 
luma with alpha is implemented */
+avctx->pix_fmt = s->output_float ? AV_PIX_FMT_GBRA

[FFmpeg-devel] [PATCH 1/3] libswscale: add input support AV_PIX_FMT_GBRAPF32

2020-04-28 Thread mindmark

From: Mark Reid 

---
 libswscale/input.c | 91 ++
 libswscale/utils.c |  4 ++
 2 files changed, 95 insertions(+)

diff --git a/libswscale/input.c b/libswscale/input.c
index 099661cb6d..e74cf04133 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -960,6 +960,59 @@ static av_always_inline void planar_rgb16_to_uv(uint8_t 
*_dstU, uint8_t *_dstV,
 }
 #undef rdpx
 
+#define rdpx(src) (is_be ? av_int2float(AV_RB32(src)): 
av_int2float(AV_RL32(src)))
+
+static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t 
*_src[4], int width, int is_be, int32_t *rgb2yuv)
+{
+int i;
+const float **src = (const float **)_src;
+uint16_t *dst= (uint16_t *)_dst;
+
+for (i = 0; i < width; i++) {
+dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i)));
+}
+}
+
+static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t 
*_dstV, const uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv)
+{
+int i;
+const float **src = (const float **)_src;
+uint16_t *dstU   = (uint16_t *)_dstU;
+uint16_t *dstV   = (uint16_t *)_dstV;
+int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+int bpc = 16;
+int shift = 14;
+for (i = 0; i < width; i++) {
+int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
+int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
+int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
+
+dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
+dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14);
+}
+}
+
+static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t 
*_src[4], int width, int is_be, int32_t *rgb2yuv)
+{
+int i;
+const float **src = (const float **)_src;
+uint16_t *dst= (uint16_t *)_dst;
+
+int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+int bpc = 16;
+int shift = 14;
+for (i = 0; i < width; i++) {
+int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
+int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
+int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
+
+dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> 
(RGB2YUV_SHIFT + shift - 14));
+}
+}
+
+#undef rdpx
+
 static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t 
*_src, const uint8_t *unused1,
 const uint8_t *unused2, int width, 
uint32_t *unused)
 {
@@ -1022,6 +1075,26 @@ rgb9plus_planar_transparency_funcs(10)
 rgb9plus_planar_transparency_funcs(12)
 rgb9plus_planar_transparency_funcs(16)
 
+#define rgbf32_planar_funcs_endian(endian_name, endian)
 \
+static void planar_rgbf32##endian_name##_to_y(uint8_t *dst, const uint8_t 
*src[4],  \
+  int w, int32_t *rgb2yuv) 
 \
+{  
 \
+planar_rgbf32_to_y(dst, src, w, endian, rgb2yuv);  
 \
+}  
 \
+static void planar_rgbf32##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV,   
 \
+   const uint8_t *src[4], int 
w, int32_t *rgb2yuv)  \
+{  
 \
+planar_rgbf32_to_uv(dstU, dstV, src, w, endian, rgb2yuv);  
 \
+}  
 \
+static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t 
*src[4],  \
+  int w, int32_t *rgb2yuv) 
 \
+{  
 \
+planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv);  
 \
+}
+
+rgbf32_planar_funcs_endian(le, 0)
+rgbf32_planar_funcs_endian(be, 1)
+
 av_cold void ff_sws_init_input_funcs(SwsContext *c)
 {
 enum AVPixelFormat srcFormat = c->srcFormat;
@@ -1070,6 +1143,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 case AV_PIX_FMT_GBRP16LE:
 c->readChrPlanar = planar_rgb16le_to_uv;
 break;
+case AV_PIX_FMT_GBRAPF32LE:
+case AV_PIX_FMT_GBRPF32LE:
+c->readChrPlanar = planar_rgbf32le_to_uv;
+break;
 case AV_PIX_F

[FFmpeg-devel] [PATCH 2/3] libswscale: add output support for AV_PIX_FMT_GBRAPF32

2020-04-28 Thread mindmark

From: Mark Reid 

---
 libswscale/output.c  | 82 
 libswscale/slice.c   | 28 
 libswscale/swscale_unscaled.c| 33 ++
 libswscale/utils.c   |  8 +--
 tests/ref/fate/filter-pixdesc-gbrapf32be |  1 +
 tests/ref/fate/filter-pixdesc-gbrapf32le |  1 +
 tests/ref/fate/filter-pixdesc-gbrpf32be  |  1 +
 tests/ref/fate/filter-pixdesc-gbrpf32le  |  1 +
 tests/ref/fate/filter-pixfmts-copy   |  4 ++
 tests/ref/fate/filter-pixfmts-crop   |  4 ++
 tests/ref/fate/filter-pixfmts-field  |  4 ++
 tests/ref/fate/filter-pixfmts-fieldorder |  4 ++
 tests/ref/fate/filter-pixfmts-hflip  |  4 ++
 tests/ref/fate/filter-pixfmts-il |  4 ++
 tests/ref/fate/filter-pixfmts-null   |  4 ++
 tests/ref/fate/filter-pixfmts-scale  |  4 ++
 tests/ref/fate/filter-pixfmts-transpose  |  4 ++
 tests/ref/fate/filter-pixfmts-vflip  |  4 ++
 18 files changed, 180 insertions(+), 15 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le

diff --git a/libswscale/output.c b/libswscale/output.c
index 68f43ffba3..e864e515d0 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2312,6 +2312,82 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t 
*lumFilter,
 }
 }
 
+static void
+yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+const int16_t **lumSrcx, int lumFilterSize,
+const int16_t *chrFilter, const int16_t **chrUSrcx,
+const int16_t **chrVSrcx, int chrFilterSize,
+const int16_t **alpSrcx, uint8_t **dest,
+int dstW, int y)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+int i;
+int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
+uint32_t **dest32 = (uint32_t**)dest;
+const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+static const float float_mult = 1.0f / 65535.0f;
+
+for (i = 0; i < dstW; i++) {
+int j;
+int Y = -0x4000;
+int U = -(128 << 23);
+int V = -(128 << 23);
+int R, G, B, A;
+
+for (j = 0; j < lumFilterSize; j++)
+Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+for (j = 0; j < chrFilterSize; j++) {
+U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+}
+
+Y >>= 14;
+Y += 0x1;
+U >>= 14;
+V >>= 14;
+
+if (hasAlpha) {
+A = -0x4000;
+
+for (j = 0; j < lumFilterSize; j++)
+A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+A >>= 1;
+A += 0x20002000;
+}
+
+Y -= c->yuv2rgb_y_offset;
+Y *= c->yuv2rgb_y_coeff;
+Y += 1 << 13;
+R = V * c->yuv2rgb_v2r_coeff;
+G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+B =U * c->yuv2rgb_u2b_coeff;
+
+R = av_clip_uintp2(Y + R, 30);
+G = av_clip_uintp2(Y + G, 30);
+B = av_clip_uintp2(Y + B, 30);
+
+dest32[0][i] = av_float2int(float_mult * (float)(G >> 14));
+dest32[1][i] = av_float2int(float_mult * (float)(B >> 14));
+dest32[2][i] = av_float2int(float_mult * (float)(R >> 14));
+if (hasAlpha)
+dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 
30) >> 14));
+}
+if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+for (i = 0; i < dstW; i++) {
+dest32[0][i] = av_bswap32(dest32[0][i]);
+dest32[1][i] = av_bswap32(dest32[1][i]);
+dest32[2][i] = av_bswap32(dest32[2][i]);
+if (hasAlpha)
+dest32[3][i] = av_bswap32(dest32[3][i]);
+}
+}
+}
+
 static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
 const int16_t *ubuf[2], const int16_t *vbuf[2],
@@ -2716,6 +2792,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 case AV_PIX_FMT_GBRAP16LE:
 *yuv2anyX = yuv2gbrp16_full_X_c;
 break;
+case AV_PIX_FMT_GBRPF32BE:
+case AV_PIX_FMT_GBRPF32LE:
+case AV_PIX_FMT_GBRAPF32BE:
+case AV_PIX_FMT_GBRAPF32LE:
+*yuv2anyX = yuv2gbrpf32_full_X_c;
+break;
 }
 if (!*yuv2packedX && !*yuv2anyX)
 goto YUV_PACKED;
diff --git a/libswscale/slice.c b/libswscale/slice.c
index db4fa874ff..83b2bba443 100644
--- a/libswscale/slice.c
+++ b/libswscale/slice.c
@@ -189,23 +189,26 @@ int ff_init_slice_from_src(SwsSlice

[FFmpeg-devel] [PATCH 0/3] libswscale: initial input/output support for AV_PIX_FMT_GBRAPF32

2020-04-28 Thread mindmark

From: Mark Reid 

Hi,
The following patches add initial input/output support for planer rgb floating 
point pixel formats in
libswscale and adds a decoder option to exr to output as float.

Mark Reid (3):
  libswscale: add input support AV_PIX_FMT_GBRAPF32
  libswscale: add output support for AV_PIX_FMT_GBRAPF32
  avcodec/exr: add option to output pixels in float

 libavcodec/exr.c | 103 +++
 libswscale/input.c   |  91 
 libswscale/output.c  |  82 ++
 libswscale/slice.c   |  28 +++---
 libswscale/swscale_unscaled.c|  33 
 libswscale/utils.c   |   4 +
 tests/ref/fate/filter-pixdesc-gbrapf32be |   1 +
 tests/ref/fate/filter-pixdesc-gbrapf32le |   1 +
 tests/ref/fate/filter-pixdesc-gbrpf32be  |   1 +
 tests/ref/fate/filter-pixdesc-gbrpf32le  |   1 +
 tests/ref/fate/filter-pixfmts-copy   |   4 +
 tests/ref/fate/filter-pixfmts-crop   |   4 +
 tests/ref/fate/filter-pixfmts-field  |   4 +
 tests/ref/fate/filter-pixfmts-fieldorder |   4 +
 tests/ref/fate/filter-pixfmts-hflip  |   4 +
 tests/ref/fate/filter-pixfmts-il |   4 +
 tests/ref/fate/filter-pixfmts-null   |   4 +
 tests/ref/fate/filter-pixfmts-scale  |   4 +
 tests/ref/fate/filter-pixfmts-transpose  |   4 +
 tests/ref/fate/filter-pixfmts-vflip  |   4 +
 20 files changed, 357 insertions(+), 28 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrapf32le
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32be
 create mode 100644 tests/ref/fate/filter-pixdesc-gbrpf32le

--
2.25.0
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] fate/exr: add test for YA16

2020-04-26 Thread mindmark

From: Mark Reid 

Hi, I noticed there was not fate test for this support format of exr.

here is the test file to add to fate
https://www.dropbox.com/s/urzus0svmq1oizd/ya_scanline_zip_half_12x8.exr?dl=0

---
 tests/fate/image.mak | 3 +++
 tests/ref/fate/exr-ya-scanline-zip-half-12x8 | 6 ++
 2 files changed, 9 insertions(+)
 create mode 100644 tests/ref/fate/exr-ya-scanline-zip-half-12x8

diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 121405aab9..f65119bffc 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@@ -281,6 +281,9 @@ fate-exr-y-tile-zip-half-12x8: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/y_tile_zi
 FATE_EXR += fate-exr-y-scanline-zip-half-12x8
 fate-exr-y-scanline-zip-half-12x8: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/y_scanline_zip_half_12x8.exr -pix_fmt gray16le

+FATE_EXR += fate-exr-ya-scanline-zip-half-12x8
+fate-exr-ya-scanline-zip-half-12x8: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/ya_scanline_zip_half_12x8.exr -pix_fmt ya16le
+
 FATE_EXR += fate-exr-rgb-scanline-half-piz-dw-t08
 fate-exr-rgb-scanline-half-piz-dw-t08: CMD = framecrc -i 
$(TARGET_SAMPLES)/exr/rgb_scanline_half_piz_dw_t08.exr -pix_fmt rgb48le

diff --git a/tests/ref/fate/exr-ya-scanline-zip-half-12x8 
b/tests/ref/fate/exr-ya-scanline-zip-half-12x8
new file mode 100644
index 00..f166396164
--- /dev/null
+++ b/tests/ref/fate/exr-ya-scanline-zip-half-12x8
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 12x8
+#sar 0: 1/1
+0,  0,  0,1,  384, 0xcb9148cc
--
2.25.0
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] avcodec/exr: add cineon lin2log trc

2020-03-05 Thread mindmark

From: Mark Reid 

Hi,
The following patch adds a cineon lin2log color transfer characteristic to exr.
The purpose of this patch is to allow preserving of the dynamic range of an
exr file when converting to DPX or when using video filter such as 3d luts.
I wasn't sure if adding it to the AVColorTransferCharacteristic enum was the
correct approach, as this might be a exr specific thing but I figured it was a 
good starting point.

changes since v1:
- updated fate test

---
 libavcodec/exr.c   |  2 ++
 libavutil/color_utils.c| 14 ++
 libavutil/pixfmt.h |  1 +
 tests/ref/fate/color_utils | 19 +++
 4 files changed, 36 insertions(+)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 1db30a1ae0..f2900a7921 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -1938,6 +1938,8 @@ static const AVOption options[] = {
 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST2084 },  INT_MIN, INT_MAX, 
VD, "apply_trc_type"},
 { "smpte428_1",   "SMPTE ST 428-1",   0,
 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST428_1 }, INT_MIN, INT_MAX, 
VD, "apply_trc_type"},
+{ "lin2log",  "Default Cineon/DPX log",   0,
+AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_CINE_LIN2LOG }, INT_MIN, INT_MAX, 
VD, "apply_trc_type"},

 { NULL },
 };
diff --git a/libavutil/color_utils.c b/libavutil/color_utils.c
index eb8bc7b5fc..e33c019d4a 100644
--- a/libavutil/color_utils.c
+++ b/libavutil/color_utils.c
@@ -167,6 +167,16 @@ static double avpriv_trc_arib_std_b67(double Lc) {
 (Lc <= 1.0 / 12.0 ? sqrt(3.0 * Lc) : a * log(12.0 * Lc - b) + c);
 }

+static double avpriv_trc_cine_lin2log(double Lc) {
+const double blackpoint =  95.0;
+const double whitepoint = 685.0;
+const double gamma  =   0.6;
+const double offset =  pow(10, (blackpoint - whitepoint) * 0.002 / gamma);
+const double gain   = 1.0 / (1.0 - offset);
+
+return (log10((Lc + offset) / gain) / (0.002 / gamma) + whitepoint ) / 
1023.0;
+}
+
 avpriv_trc_function avpriv_get_trc_function_from_trc(enum 
AVColorTransferCharacteristic trc)
 {
 avpriv_trc_function func = NULL;
@@ -225,6 +235,10 @@ avpriv_trc_function avpriv_get_trc_function_from_trc(enum 
AVColorTransferCharact
 func = avpriv_trc_arib_std_b67;
 break;

+case AVCOL_TRC_CINE_LIN2LOG:
+func = avpriv_trc_cine_lin2log;
+break;
+
 case AVCOL_TRC_RESERVED0:
 case AVCOL_TRC_UNSPECIFIED:
 case AVCOL_TRC_RESERVED:
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 1c625cfc8a..1f3f9988d7 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -499,6 +499,7 @@ enum AVColorTransferCharacteristic {
 AVCOL_TRC_SMPTE428 = 17, ///< SMPTE ST 428-1
 AVCOL_TRC_SMPTEST428_1 = AVCOL_TRC_SMPTE428,
 AVCOL_TRC_ARIB_STD_B67 = 18, ///< ARIB STD-B67, known as "Hybrid log-gamma"
+AVCOL_TRC_CINE_LIN2LOG = 19, ///< Default Cineon/DPX linear to log 1D curve
 AVCOL_TRC_NB ///< Not part of ABI
 };

diff --git a/tests/ref/fate/color_utils b/tests/ref/fate/color_utils
index 10f8055916..41221c131a 100644
--- a/tests/ref/fate/color_utils
+++ b/tests/ref/fate/color_utils
@@ -302,3 +302,22 @@ AVColorTransferCharacteristic=18 calling 
func(15123.456700) expected=2.725380
 AVColorTransferCharacteristic=18 calling func(19845.889230) expected=2.773978
 AVColorTransferCharacteristic=18 calling func(98678.423100) expected=3.060803
 AVColorTransferCharacteristic=18 calling func(9.88) expected=3.063182
+AVColorTransferCharacteristic=19 calling func(-0.10) expected=nan
+AVColorTransferCharacteristic=19 calling func(-0.018054) expected=nan
+AVColorTransferCharacteristic=19 calling func(-0.01) expected=-0.240327
+AVColorTransferCharacteristic=19 calling func(-0.004490) expected=0.023018
+AVColorTransferCharacteristic=19 calling func(0.00) expected=0.091481
+AVColorTransferCharacteristic=19 calling func(0.003162) expected=0.124195
+AVColorTransferCharacteristic=19 calling func(0.005000) expected=0.139945
+AVColorTransferCharacteristic=19 calling func(0.009000) expected=0.168690
+AVColorTransferCharacteristic=19 calling func(0.015000) expected=0.202405
+AVColorTransferCharacteristic=19 calling func(0.10) expected=0.388020
+AVColorTransferCharacteristic=19 calling func(1.00) expected=0.669584
+AVColorTransferCharacteristic=19 calling func(52.37) expected=1.172373
+AVColorTransferCharacteristic=19 calling func(125.098765) expected=1.283258
+AVColorTransferCharacteristic=19 calling func(1999.111230) expected=1.636205
+AVColorTransferCharacteristic=19 calling func(6945.443000) expected=1.794815
+AVColorTransferCharacteristic=19 calling func(15123.456700) expected=1.893921
+AVColorTransferCharacteristic=19 calling func(19845.889230) expected=1.928531
+AVColorTransferCharacteristic=19 calling func(98678.423100) expected=2.132798
+AVColorTransferCharacteristic=19 calling func(9.88) expected=2.134492
--
2.2

[FFmpeg-devel] [PATCH] avcodec/exr: add cineon lin2log trc

2020-03-05 Thread mindmark

From: Mark Reid 

Hi,
The following patch adds a cineon lin2log color transfer characteristic to exr.
The purpose of this patch is to allow preserving of the dynamic range of an
exr file when converting to DPX or when using video filter such as 3d luts.
I wasn't sure if adding it to the AVColorTransferCharacteristic enum was the
correct approach as this might be a exr specific thing but I figured it was a 
good starting point.

---
 libavcodec/exr.c|  2 ++
 libavutil/color_utils.c | 14 ++
 libavutil/pixfmt.h  |  1 +
 3 files changed, 17 insertions(+)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 1db30a1ae0..f2900a7921 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -1938,6 +1938,8 @@ static const AVOption options[] = {
 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST2084 },  INT_MIN, INT_MAX, 
VD, "apply_trc_type"},
 { "smpte428_1",   "SMPTE ST 428-1",   0,
 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST428_1 }, INT_MIN, INT_MAX, 
VD, "apply_trc_type"},
+{ "lin2log",  "Default Cineon/DPX log",   0,
+AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_CINE_LIN2LOG }, INT_MIN, INT_MAX, 
VD, "apply_trc_type"},

 { NULL },
 };
diff --git a/libavutil/color_utils.c b/libavutil/color_utils.c
index eb8bc7b5fc..e33c019d4a 100644
--- a/libavutil/color_utils.c
+++ b/libavutil/color_utils.c
@@ -167,6 +167,16 @@ static double avpriv_trc_arib_std_b67(double Lc) {
 (Lc <= 1.0 / 12.0 ? sqrt(3.0 * Lc) : a * log(12.0 * Lc - b) + c);
 }

+static double avpriv_trc_cine_lin2log(double Lc) {
+const double blackpoint =  95.0;
+const double whitepoint = 685.0;
+const double gamma  =   0.6;
+const double offset =  pow(10, (blackpoint - whitepoint) * 0.002 / gamma);
+const double gain   = 1.0 / (1.0 - offset);
+
+return (log10((Lc + offset) / gain) / (0.002 / gamma) + whitepoint ) / 
1023.0;
+}
+
 avpriv_trc_function avpriv_get_trc_function_from_trc(enum 
AVColorTransferCharacteristic trc)
 {
 avpriv_trc_function func = NULL;
@@ -225,6 +235,10 @@ avpriv_trc_function avpriv_get_trc_function_from_trc(enum 
AVColorTransferCharact
 func = avpriv_trc_arib_std_b67;
 break;

+case AVCOL_TRC_CINE_LIN2LOG:
+func = avpriv_trc_cine_lin2log;
+break;
+
 case AVCOL_TRC_RESERVED0:
 case AVCOL_TRC_UNSPECIFIED:
 case AVCOL_TRC_RESERVED:
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 1c625cfc8a..1f3f9988d7 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -499,6 +499,7 @@ enum AVColorTransferCharacteristic {
 AVCOL_TRC_SMPTE428 = 17, ///< SMPTE ST 428-1
 AVCOL_TRC_SMPTEST428_1 = AVCOL_TRC_SMPTE428,
 AVCOL_TRC_ARIB_STD_B67 = 18, ///< ARIB STD-B67, known as "Hybrid log-gamma"
+AVCOL_TRC_CINE_LIN2LOG = 19, ///< Default Cineon/DPX linear to log 1D curve
 AVCOL_TRC_NB ///< Not part of ABI
 };

--
2.21.0
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 2/2] fate/mxf: add mxf user comments tests

2019-03-11 Thread mindmark

From: Mark Reid 

---
 tests/fate/mxf.mak  | 15 ++-
 tests/ref/fate/mxf-d10-user-comments|  1 +
 tests/ref/fate/mxf-opatom-user-comments |  1 +
 tests/ref/fate/mxf-user-comments|  1 +
 4 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 tests/ref/fate/mxf-d10-user-comments
 create mode 100644 tests/ref/fate/mxf-opatom-user-comments
 create mode 100644 tests/ref/fate/mxf-user-comments

diff --git a/tests/fate/mxf.mak b/tests/fate/mxf.mak
index dce23d522e..62e4ec01cb 100644
--- a/tests/fate/mxf.mak
+++ b/tests/fate/mxf.mak
@@ -37,9 +37,22 @@ FATE_MXF_REEL_NAME-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, 
MXF) += fate-mxf-reel_
 fate-mxf-reel_name: $(TARGET_SAMPLES)/mxf/Sony-1.mxf
 fate-mxf-reel_name: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-1.mxf  -c 
copy -timecode 00:00:00:00 -metadata "reel_name=test_reel" -fflags +bitexact -f 
mxf
 
+FATE_MXF_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += 
fate-mxf-user-comments
+fate-mxf-user-comments: $(TARGET_SAMPLES)/mxf/Sony-1.mxf
+fate-mxf-user-comments: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-1.mxf 
-c copy -metadata "comment_test=value" -fflags +bitexact -f mxf
+
+FATE_MXF_D10_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += 
fate-mxf-d10-user-comments
+fate-mxf-d10-user-comments: $(TARGET_SAMPLES)/mxf/Sony-1.mxf
+fate-mxf-d10-user-comments: CMD = md5 -y -i 
$(TARGET_SAMPLES)/mxf/Sony-1.mxf -c copy -metadata "comment_test=value" 
-store_user_comments 1 -fflags +bitexact -f mxf_d10
+
+FATE_MXF_OPATOM_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += 
fate-mxf-opatom-user-comments
+fate-mxf-opatom-user-comments: $(TARGET_SAMPLES)/mxf/Sony-1.mxf
+fate-mxf-opatom-user-comments: CMD = md5 -y -i 
$(TARGET_SAMPLES)/mxf/Sony-1.mxf -an -vcodec copy -metadata 
"comment_test=value" -fflags +bitexact -f mxf_opatom
+
 FATE_MXF-$(CONFIG_MXF_DEMUXER) += $(FATE_MXF)
 
 FATE_SAMPLES_AVCONV += $(FATE_MXF-yes) $(FATE_MXF_REEL_NAME-yes)
+FATE_SAMPLES_AVCONV += $(FATE_MXF_USER_COMMENTS-yes) 
$(FATE_MXF_D10_USER_COMMENTS-yes) $(FATE_MXF_OPATOM_USER_COMMENTS-yes)
 FATE_SAMPLES_FFPROBE += $(FATE_MXF_PROBE-yes)
 
-fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes)
+fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes) 
$(FATE_MXF_USER_COMMENTS-yes) $(FATE_MXF_D10_USER_COMMENTS-yes) 
$(FATE_MXF_OPATOM_USER_COMMENTS-yes)
diff --git a/tests/ref/fate/mxf-d10-user-comments 
b/tests/ref/fate/mxf-d10-user-comments
new file mode 100644
index 00..e78765020c
--- /dev/null
+++ b/tests/ref/fate/mxf-d10-user-comments
@@ -0,0 +1 @@
+b659c1204f8d04e2a5607af083590dca
diff --git a/tests/ref/fate/mxf-opatom-user-comments 
b/tests/ref/fate/mxf-opatom-user-comments
new file mode 100644
index 00..1834b9e074
--- /dev/null
+++ b/tests/ref/fate/mxf-opatom-user-comments
@@ -0,0 +1 @@
+892cf02e44bf7d61b6d6f01e41db9375
diff --git a/tests/ref/fate/mxf-user-comments b/tests/ref/fate/mxf-user-comments
new file mode 100644
index 00..4b734a0f85
--- /dev/null
+++ b/tests/ref/fate/mxf-user-comments
@@ -0,0 +1 @@
+683bacb0105e5bc5bbf46aa430c644d1
-- 
2.21.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH v2 1/2] avformat/mxfenc: allow user comments for opatom muxer

2019-03-11 Thread mindmark

From: Mark Reid 

---
 doc/muxers.texi  | 4 ++--
 libavformat/mxfenc.c | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/muxers.texi b/doc/muxers.texi
index 372fab2f92..aac7d94edf 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -1629,7 +1629,7 @@ ffmpeg -i file.mpg -c copy \
  out.ts
 @end example
 
-@section mxf, mxf_d10
+@section mxf, mxf_d10, mxf_opatom
 
 MXF muxer.
 
@@ -1641,7 +1641,7 @@ The muxer options are:
 @item store_user_comments @var{bool}
 Set if user comments should be stored if available or never.
 IRT D-10 does not allow user comments. The default is thus to write them for
-mxf but not for mxf_d10
+mxf and mxf_opatom but not for mxf_d10
 @end table
 
 @section null
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index 032ee3bf3d..8c6db94865 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -3095,6 +3095,8 @@ static const AVOption opatom_options[] = {
 { "mxf_audio_edit_rate", "Audio edit rate for timecode",
 offsetof(MXFContext, audio_edit_rate), AV_OPT_TYPE_RATIONAL, 
{.dbl=25}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
 MXF_COMMON_OPTIONS
+{ "store_user_comments", "",
+  offsetof(MXFContext, store_user_comments), AV_OPT_TYPE_BOOL, {.i64 = 1}, 
0, 1, AV_OPT_FLAG_ENCODING_PARAM},
 { NULL },
 };
 
-- 
2.21.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] avformat/mxfenc: allow user comments for opatom muxer

2019-03-10 Thread mindmark

From: Mark Reid 

This patch restores the ability to add user comments for the opatom_mxf muxer.
The ability seems to have been disabled in d9726893f31.

---
 doc/muxers.texi  | 2 +-
 libavformat/mxfenc.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/muxers.texi b/doc/muxers.texi
index 372fab2f92..764102bf4b 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -1629,7 +1629,7 @@ ffmpeg -i file.mpg -c copy \
  out.ts
 @end example

-@section mxf, mxf_d10
+@section mxf, mxf_d10, mxf_opatom

 MXF muxer.

diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index 032ee3bf3d..8c6db94865 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -3095,6 +3095,8 @@ static const AVOption opatom_options[] = {
 { "mxf_audio_edit_rate", "Audio edit rate for timecode",
 offsetof(MXFContext, audio_edit_rate), AV_OPT_TYPE_RATIONAL, 
{.dbl=25}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
 MXF_COMMON_OPTIONS
+{ "store_user_comments", "",
+  offsetof(MXFContext, store_user_comments), AV_OPT_TYPE_BOOL, {.i64 = 1}, 
0, 1, AV_OPT_FLAG_ENCODING_PARAM},
 { NULL },
 };

--
2.18.0
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

87 matches

Mail list logo