[FFmpeg-devel] [PATCH] avformat/mpegts: Don't use uninitialized value in av_log()

2024-05-21 Thread Andreas Rheinhardt
It is undefined behaviour in (at least) C11 (see C11 6.3.2.1 (2)).
Fixes Coverity issue #1500314.

Signed-off-by: Andreas Rheinhardt 
---
 libavformat/mpegts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 3a5cb769ba..c66a1ea6ed 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -2189,7 +2189,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, 
AVStream *st, int stream_type
 uint32_t buf;
 AVDOVIDecoderConfigurationRecord *dovi;
 size_t dovi_size;
-int dependency_pid;
+int dependency_pid = -1; // Unset
 
 if (desc_end - *pp < 4) // (8 + 8 + 7 + 6 + 1 + 1 + 1) / 8
 return AVERROR_INVALIDDATA;
-- 
2.40.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 6/6] avformat/dhav: Check amount read

2024-05-21 Thread Andreas Rheinhardt
Prevents potential use of uninitialized data in the following
memcmp().

Signed-off-by: Andreas Rheinhardt 
---
 libavformat/dhav.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavformat/dhav.c b/libavformat/dhav.c
index 8e08274e68..b2ead99609 100644
--- a/libavformat/dhav.c
+++ b/libavformat/dhav.c
@@ -278,7 +278,9 @@ static int dhav_read_header(AVFormatContext *s)
 if (ret < 0)
 return ret;
 
-avio_read(s->pb, signature, sizeof(signature));
+ret = ffio_read_size(s->pb, signature, sizeof(signature));
+if (ret < 0)
+return ret;
 if (!memcmp(signature, "DAHUA", 5)) {
 avio_skip(s->pb, 0x400 - 5);
 dhav->last_good_pos = avio_tell(s->pb);
-- 
2.40.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 5/6] avformat/dhav: Check ffio_ensure_seekback()

2024-05-21 Thread Andreas Rheinhardt
Fixes Coverity issue #1492324.

Signed-off-by: Andreas Rheinhardt 
---
 libavformat/dhav.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavformat/dhav.c b/libavformat/dhav.c
index 303fb42bed..8e08274e68 100644
--- a/libavformat/dhav.c
+++ b/libavformat/dhav.c
@@ -273,8 +273,11 @@ static int dhav_read_header(AVFormatContext *s)
 {
 DHAVContext *dhav = s->priv_data;
 uint8_t signature[5];
+int ret = ffio_ensure_seekback(s->pb, 5);
+
+if (ret < 0)
+return ret;
 
-ffio_ensure_seekback(s->pb, 5);
 avio_read(s->pb, signature, sizeof(signature));
 if (!memcmp(signature, "DAHUA", 5)) {
 avio_skip(s->pb, 0x400 - 5);
-- 
2.40.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 4/6] avformat/qoadec: Check ffio_ensure_seekback()

2024-05-21 Thread Andreas Rheinhardt
Fixes Coverity issue #1598406.

Signed-off-by: Andreas Rheinhardt 
---
 libavformat/qoadec.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavformat/qoadec.c b/libavformat/qoadec.c
index 9cce5157fc..a9632c46c3 100644
--- a/libavformat/qoadec.c
+++ b/libavformat/qoadec.c
@@ -41,6 +41,7 @@ static int qoa_read_header(AVFormatContext *s)
 {
 AVIOContext *pb = s->pb;
 AVStream *st;
+int ret;
 
 st = avformat_new_stream(s, NULL);
 if (!st)
@@ -52,7 +53,9 @@ static int qoa_read_header(AVFormatContext *s)
 st->duration = avio_rb32(pb);
 st->start_time = 0;
 
-ffio_ensure_seekback(pb, 4);
+ret = ffio_ensure_seekback(pb, 4);
+if (ret < 0)
+return ret;
 st->codecpar->ch_layout.nb_channels = avio_r8(pb);
 if (st->codecpar->ch_layout.nb_channels == 0)
 return AVERROR_INVALIDDATA;
-- 
2.40.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 3/6] avformat/westwood_vqa: Check ffio_ensure_seekback()

2024-05-21 Thread Andreas Rheinhardt
Fixes Coverity issue #1598405.

Signed-off-by: Andreas Rheinhardt 
---
 libavformat/westwood_vqa.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavformat/westwood_vqa.c b/libavformat/westwood_vqa.c
index 3a31e3f5e8..9755fcc9c1 100644
--- a/libavformat/westwood_vqa.c
+++ b/libavformat/westwood_vqa.c
@@ -201,8 +201,10 @@ static int wsvqa_read_packet(AVFormatContext *s,
 /* We need a big seekback buffer because there can be SNxx, VIEW 
and ZBUF
  * chunks (<512 KiB total) in the stream before we read VQFR (<256 
KiB) and
  * seek back here. */
-ffio_ensure_seekback(pb, wsvqa->vqfl_chunk_size + (512 + 256) * 
1024);
+ret = ffio_ensure_seekback(pb, wsvqa->vqfl_chunk_size + (512 + 
256) * 1024);
 avio_skip(pb, chunk_size + skip_byte);
+if (ret < 0)
+return ret;
 continue;
 } else if ((chunk_type == SND0_TAG) || (chunk_type == SND1_TAG) ||
 (chunk_type == SND2_TAG) || (chunk_type == VQFR_TAG)) {
-- 
2.40.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/6] avformat/gifdec: Check ffio_ensure_seekback()

2024-05-21 Thread Andreas Rheinhardt
Fixes Coverity issue #1598400.

Signed-off-by: Andreas Rheinhardt 
---
 libavformat/gifdec.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavformat/gifdec.c b/libavformat/gifdec.c
index 294007682b..d5f06adc64 100644
--- a/libavformat/gifdec.c
+++ b/libavformat/gifdec.c
@@ -85,7 +85,10 @@ static int gif_probe(const AVProbeData *p)
 
 static int resync(AVIOContext *pb)
 {
-ffio_ensure_seekback(pb, 13);
+int ret = ffio_ensure_seekback(pb, 13);
+if (ret < 0)
+return ret;
+
 for (int i = 0; i < 6; i++) {
 int b = avio_r8(pb);
 if (b != gif87a_sig[i] && b != gif89a_sig[i])
-- 
2.40.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 1/6] avformat/oggdec: Check ffio_ensure_seekback()

2024-05-21 Thread Andreas Rheinhardt
Fixes Coverity issue #1492327.

Signed-off-by: Andreas Rheinhardt 
---
 libavformat/oggdec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 8ea81e5d45..5339fdd32c 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -364,7 +364,9 @@ static int ogg_read_page(AVFormatContext *s, int *sid, int 
probing)
 ffio_init_checksum(bc, ff_crc04C11DB7_update, 0x4fa9b05f);
 
 /* To rewind if checksum is bad/check magic on switches - this is the max 
packet size */
-ffio_ensure_seekback(bc, MAX_PAGE_SIZE);
+ret = ffio_ensure_seekback(bc, MAX_PAGE_SIZE);
+if (ret < 0)
+return ret;
 start_pos = avio_tell(bc);
 
 version = avio_r8(bc);
-- 
2.40.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v5 1/2][GSoC 2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC

2024-05-21 Thread Andreas Rheinhardt
Stone Chen:
> Implements AVX2 DMVR (decoder-side motion vector refinement) SAD functions. 
> DMVR SAD is only calculated if w >= 8, h >= 8, and w * h > 128. To reduce 
> complexity, SAD is only calculated on even rows. This is calculated for all 
> video bitdepths, but the values passed to the function are always 16bit (even 
> if the original video bitdepth is 8). The AVX2 implementation uses 
> min/max/sub.
> 
> Additionally this changes parameters dx and dy from int to intptr_t. This 
> allows dx & dy to be used as pointer offsets without needing to use movsxd.
> 
> Benchmarks ( AMD 7940HS )
> Before:
> BQTerrace_1920x1080_60_10_420_22_RA.vvc | 106.0 |
> Chimera_8bit_1080P_1000_frames.vvc | 204.3 |
> NovosobornayaSquare_1920x1080.bin | 197.3 |
> RitualDance_1920x1080_60_10_420_37_RA.266 | 174.0 |
> 
> After:
> BQTerrace_1920x1080_60_10_420_22_RA.vvc | 109.3 |
> Chimera_8bit_1080P_1000_frames.vvc | 216.0 |
> NovosobornayaSquare_1920x1080.bin | 204.0|
> RitualDance_1920x1080_60_10_420_37_RA.266 | 181.7 |
> ---
>  libavcodec/vvc/dsp.c |   2 +-
>  libavcodec/vvc/dsp.h |   2 +-
>  libavcodec/x86/vvc/Makefile  |   3 +-
>  libavcodec/x86/vvc/vvc_sad.asm   | 130 +++
>  libavcodec/x86/vvc/vvcdsp_init.c |   6 ++
>  5 files changed, 140 insertions(+), 3 deletions(-)
>  create mode 100644 libavcodec/x86/vvc/vvc_sad.asm
> 
> diff --git a/libavcodec/x86/vvc/vvcdsp_init.c 
> b/libavcodec/x86/vvc/vvcdsp_init.c
> index 0e68971b2c..aa6c916760 100644
> --- a/libavcodec/x86/vvc/vvcdsp_init.c
> +++ b/libavcodec/x86/vvc/vvcdsp_init.c
> @@ -311,6 +311,9 @@ ALF_FUNCS(16, 12, avx2)
>  c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2;\
>  c->alf.classify   = ff_vvc_alf_classify_##bd##_avx2; \
>  } while (0)
> +
> +int ff_vvc_sad_avx2(const int16_t *src0, const int16_t *src1, intptr_t dx, 
> intptr_t dy, int block_w, int block_h);
> +#define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2

You are adding an AVX2 function to an ARCH_X86_64 #if block. I expect
this to lead to linking failures if AVX2 is disabled.

>  #endif
>  
>  void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
> @@ -327,6 +330,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const 
> int bd)
>  ALF_INIT(8);
>  AVG_INIT(8, avx2);
>  MC_LINKS_AVX2(8);
> +SAD_INIT();
>  }
>  break;
>  case 10:
> @@ -338,6 +342,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const 
> int bd)
>  AVG_INIT(10, avx2);
>  MC_LINKS_AVX2(10);
>  MC_LINKS_16BPC_AVX2(10);
> +SAD_INIT();
>  }
>  break;
>  case 12:
> @@ -349,6 +354,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const 
> int bd)
>  AVG_INIT(12, avx2);
>  MC_LINKS_AVX2(12);
>  MC_LINKS_16BPC_AVX2(12);
> +SAD_INIT();
>  }
>  break;
>  default:

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2] avcodec/dovi - correctly read el_bit_depth_minus8 and ext_mapping_idc

2024-05-21 Thread Andreas Rheinhardt
Cosmin Stejerean via ffmpeg-devel:
> From: Cosmin Stejerean 
> 
> These two fields are coded together into a single 16 bit integer with upper 8
> bits for ext_mapping_idc and lower 8 bits for el_bit_depth_minus8.
> 
> Furthermore ext_mapping_idc has two components, upper 3 bits and lower 5 bits.

How do you know about these fields? You seem to know something that
Niklas doesn't.

> 
> ---
>  libavcodec/dovi_rpudec.c | 7 ++-
>  libavcodec/dovi_rpuenc.c | 4 +++-
>  libavutil/dovi_meta.h| 2 ++
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/dovi_rpudec.c b/libavcodec/dovi_rpudec.c
> index 7c7eda9d09..af41ab5827 100644
> --- a/libavcodec/dovi_rpudec.c
> +++ b/libavcodec/dovi_rpudec.c
> @@ -411,13 +411,18 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t 
> *rpu, size_t rpu_size,
>  
>  if ((hdr->rpu_format & 0x700) == 0) {
>  int bl_bit_depth_minus8 = get_ue_golomb_31(gb);
> -int el_bit_depth_minus8 = get_ue_golomb_31(gb);
> +int el_bit_depth_minus8_and_ext_mapping_idc = 
> get_ue_golomb_long(gb);
> +int el_bit_depth_minus8 = 
> el_bit_depth_minus8_and_ext_mapping_idc & 0xFF; // lowest 8 bits
> +int ext_mapping_idc = (el_bit_depth_minus8_and_ext_mapping_idc & 
> 0xFF00) >> 8; // upper 8 bits
> +
>  int vdr_bit_depth_minus8 = get_ue_golomb_31(gb);
>  VALIDATE(bl_bit_depth_minus8, 0, 8);
>  VALIDATE(el_bit_depth_minus8, 0, 8);
>  VALIDATE(vdr_bit_depth_minus8, 0, 8);
>  hdr->bl_bit_depth = bl_bit_depth_minus8 + 8;
>  hdr->el_bit_depth = el_bit_depth_minus8 + 8;
> +hdr->ext_mapping_idc_0_4 = ext_mapping_idc & 0x1F; // lowest 5 
> bits of ext_mapping_idc
> +hdr->ext_mapping_idc_5_7 = (ext_mapping_idc & 0xE0) >> 5; // 
> upper 3 bits of ext_mapping_idc
>  hdr->vdr_bit_depth = vdr_bit_depth_minus8 + 8;
>  hdr->spatial_resampling_filter_flag = get_bits1(gb);
>  skip_bits(gb, 3); /* reserved_zero_3bits */
> diff --git a/libavcodec/dovi_rpuenc.c b/libavcodec/dovi_rpuenc.c
> index 3c3e0f84c0..91c0a85050 100644
> --- a/libavcodec/dovi_rpuenc.c
> +++ b/libavcodec/dovi_rpuenc.c
> @@ -444,6 +444,7 @@ int ff_dovi_rpu_generate(DOVIContext *s, const 
> AVDOVIMetadata *metadata,
>  int vdr_dm_metadata_changed, vdr_rpu_id, use_prev_vdr_rpu, profile,
>  buffer_size, rpu_size, pad, zero_run;
>  int num_ext_blocks_v1, num_ext_blocks_v2;
> +uint8_t ext_mapping_idc;
>  uint32_t crc;
>  uint8_t *dst;
>  if (!metadata) {
> @@ -551,7 +552,8 @@ int ff_dovi_rpu_generate(DOVIContext *s, const 
> AVDOVIMetadata *metadata,
>  put_bits(pb, 1, hdr->bl_video_full_range_flag);
>  if ((hdr->rpu_format & 0x700) == 0) {
>  set_ue_golomb(pb, hdr->bl_bit_depth - 8);
> -set_ue_golomb(pb, hdr->el_bit_depth - 8);
> +ext_mapping_idc = (hdr->ext_mapping_idc_5_7 << 5) | 
> hdr->ext_mapping_idc_0_4;
> +set_ue_golomb(pb, (ext_mapping_idc << 8) | hdr->el_bit_depth - 8);
>  set_ue_golomb(pb, hdr->vdr_bit_depth - 8);
>  put_bits(pb, 1, hdr->spatial_resampling_filter_flag);
>  put_bits(pb, 3, 0); /* reserved_zero_3bits */
> diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
> index e10332f8d7..d01bfe19fe 100644
> --- a/libavutil/dovi_meta.h
> +++ b/libavutil/dovi_meta.h
> @@ -87,6 +87,8 @@ typedef struct AVDOVIRpuDataHeader {
>  uint8_t bl_video_full_range_flag;
>  uint8_t bl_bit_depth; /* [8, 16] */
>  uint8_t el_bit_depth; /* [8, 16] */
> +uint8_t ext_mapping_idc_0_4; /* extended base layer inverse mapping 
> indicator */
> +uint8_t ext_mapping_idc_5_7; /* reserved */

This is an ABI break. All new additions need to be put at the end.
Furthermore this needs an entry in APIChanges and a lavu minor version
bump. And it should be in a patch of its own.

>  uint8_t vdr_bit_depth; /* [8, 16] */
>  uint8_t spatial_resampling_filter_flag;
>  uint8_t el_spatial_resampling_filter_flag;

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/2] lavc/qsvenc_av1: accept HDR metadata if have

2024-05-21 Thread Xiang, Haihao
From: Haihao Xiang 

The sdk av1 encoder can accept HDR metadata via mfxEncodeCtrl::ExtParam.

Signed-off-by: Haihao Xiang 
---
 libavcodec/qsvenc_av1.c | 73 +
 1 file changed, 73 insertions(+)

diff --git a/libavcodec/qsvenc_av1.c b/libavcodec/qsvenc_av1.c
index 33727bb07e..56002746b9 100644
--- a/libavcodec/qsvenc_av1.c
+++ b/libavcodec/qsvenc_av1.c
@@ -25,6 +25,8 @@
 #include 
 
 #include "libavutil/common.h"
+#include "libavutil/mastering_display_metadata.h"
+#include "libavutil/mem.h"
 #include "libavutil/opt.h"
 
 #include "avcodec.h"
@@ -39,6 +41,75 @@ typedef struct QSVAV1EncContext {
 QSVEncContext qsv;
 } QSVAV1EncContext;
 
+static int qsv_av1_set_encode_ctrl(AVCodecContext *avctx,
+   const AVFrame *frame, mfxEncodeCtrl 
*enc_ctrl)
+{
+QSVAV1EncContext *q = avctx->priv_data;
+AVFrameSideData *sd;
+
+if (!frame || !QSV_RUNTIME_VERSION_ATLEAST(q->qsv.ver, 2, 11))
+return 0;
+
+sd = av_frame_get_side_data(frame, 
AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+if (sd) {
+AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata 
*)sd->data;
+if (mdm->has_primaries && mdm->has_luminance) {
+const int chroma_den   = 1 << 16;
+const int max_luma_den = 1 << 8;
+const int min_luma_den = 1 << 14;
+mfxExtMasteringDisplayColourVolume *mdcv = 
av_mallocz(sizeof(*mdcv));
+if (!mdcv)
+return AVERROR(ENOMEM);
+
+mdcv->Header.BufferId = 
MFX_EXTBUFF_MASTERING_DISPLAY_COLOUR_VOLUME;
+mdcv->Header.BufferSz = sizeof(*mdcv);
+
+for (int i = 0; i < 3; i++) {
+mdcv->DisplayPrimariesX[i] =
+av_rescale(mdm->display_primaries[i][0].num, chroma_den,
+   mdm->display_primaries[i][0].den);
+mdcv->DisplayPrimariesY[i] =
+av_rescale(mdm->display_primaries[i][1].num, chroma_den,
+   mdm->display_primaries[i][1].den);
+}
+
+mdcv->WhitePointX =
+av_rescale(mdm->white_point[0].num, chroma_den,
+   mdm->white_point[0].den);
+mdcv->WhitePointY =
+av_rescale(mdm->white_point[1].num, chroma_den,
+   mdm->white_point[1].den);
+
+mdcv->MaxDisplayMasteringLuminance =
+av_rescale(mdm->max_luminance.num, max_luma_den,
+   mdm->max_luminance.den);
+mdcv->MinDisplayMasteringLuminance =
+av_rescale(mdm->min_luminance.num, min_luma_den,
+   mdm->min_luminance.den);
+
+enc_ctrl->ExtParam[enc_ctrl->NumExtParam++] = (mfxExtBuffer *)mdcv;
+}
+}
+
+sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+if (sd) {
+AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data;
+mfxExtContentLightLevelInfo *clli = av_mallocz(sizeof(*clli));
+if (!clli)
+return AVERROR(ENOMEM);
+
+clli->Header.BufferId = MFX_EXTBUFF_CONTENT_LIGHT_LEVEL_INFO;
+clli->Header.BufferSz = sizeof(*clli);
+
+clli->MaxContentLightLevel  = clm->MaxCLL;
+clli->MaxPicAverageLightLevel   = clm->MaxFALL;
+
+enc_ctrl->ExtParam[enc_ctrl->NumExtParam++] = (mfxExtBuffer *)clli;
+}
+
+return 0;
+}
+
 static av_cold int qsv_enc_init(AVCodecContext *avctx)
 {
 QSVAV1EncContext *q = avctx->priv_data;
@@ -61,6 +132,8 @@ static av_cold int qsv_enc_init(AVCodecContext *avctx)
return ret;
 }
 
+q->qsv.set_encode_ctrl_cb = qsv_av1_set_encode_ctrl;
+
 return ff_qsv_enc_init(avctx, &q->qsv);
 }
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 1/2] lavc/qsvdec: update HDR side data on output AVFrame for AV1 decoding

2024-05-21 Thread Xiang, Haihao
From: Haihao Xiang 

The SDK may provide HDR metadata for HDR streams via mfxExtBuffer
attached on output mfxFrameSurface1

Signed-off-by: Haihao Xiang 
---
 libavcodec/qsvdec.c | 48 -
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index df0d49bc10..7741baff06 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -538,7 +538,8 @@ static int alloc_frame(AVCodecContext *avctx, QSVContext 
*q, QSVFrame *frame)
 #endif
 
 #if QSV_VERSION_ATLEAST(1, 35)
-if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 1, 35) && avctx->codec_id == 
AV_CODEC_ID_HEVC) {
+if ((QSV_RUNTIME_VERSION_ATLEAST(q->ver, 1, 35) && avctx->codec_id == 
AV_CODEC_ID_HEVC) ||
+(QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 9) && avctx->codec_id == 
AV_CODEC_ID_AV1)) {
 frame->mdcv.Header.BufferId = 
MFX_EXTBUFF_MASTERING_DISPLAY_COLOUR_VOLUME;
 frame->mdcv.Header.BufferSz = sizeof(frame->mdcv);
 // The data in mdcv is valid when this flag is 1
@@ -742,6 +743,45 @@ static int qsv_export_hdr_side_data(AVCodecContext *avctx, 
mfxExtMasteringDispla
 return 0;
 }
 
+static int qsv_export_hdr_side_data_av1(AVCodecContext *avctx, 
mfxExtMasteringDisplayColourVolume *mdcv,
+mfxExtContentLightLevelInfo *clli, 
AVFrame *frame)
+{
+if (mdcv->InsertPayloadToggle) {
+AVMasteringDisplayMetadata *mastering = 
av_mastering_display_metadata_create_side_data(frame);
+const int chroma_den   = 1 << 16;
+const int max_luma_den = 1 << 8;
+const int min_luma_den = 1 << 14;
+
+if (!mastering)
+return AVERROR(ENOMEM);
+
+for (int i = 0; i < 3; i++) {
+mastering->display_primaries[i][0] = 
av_make_q(mdcv->DisplayPrimariesX[i], chroma_den);
+mastering->display_primaries[i][1] = 
av_make_q(mdcv->DisplayPrimariesY[i], chroma_den);
+}
+
+mastering->white_point[0] = av_make_q(mdcv->WhitePointX, chroma_den);
+mastering->white_point[1] = av_make_q(mdcv->WhitePointY, chroma_den);
+
+mastering->max_luminance = 
av_make_q(mdcv->MaxDisplayMasteringLuminance, max_luma_den);
+mastering->min_luminance = 
av_make_q(mdcv->MinDisplayMasteringLuminance, min_luma_den);
+
+mastering->has_luminance = 1;
+mastering->has_primaries = 1;
+}
+
+if (clli->InsertPayloadToggle) {
+AVContentLightMetadata *light = 
av_content_light_metadata_create_side_data(frame);
+if (!light)
+return AVERROR(ENOMEM);
+
+light->MaxCLL  = clli->MaxContentLightLevel;
+light->MaxFALL = clli->MaxPicAverageLightLevel;
+}
+
+return 0;
+}
+
 #endif
 
 static int qsv_decode(AVCodecContext *avctx, QSVContext *q,
@@ -874,6 +914,12 @@ static int qsv_decode(AVCodecContext *avctx, QSVContext *q,
 if (ret < 0)
 return ret;
 }
+
+if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 9) && avctx->codec_id == 
AV_CODEC_ID_AV1) {
+ret = qsv_export_hdr_side_data_av1(avctx, &aframe.frame->mdcv, 
&aframe.frame->clli, frame);
+if (ret < 0)
+return ret;
+}
 #endif
 
 frame->repeat_pict =
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v5 2/2][GSoC 2024] tests/checkasm: Add check_vvc_sad to vvc_mc.c

2024-05-21 Thread James Almer

On 5/21/2024 9:00 PM, Stone Chen wrote:

Adds checkasm for DMVR SAD AVX2 implementation.

Benchmarks ( AMD 7940HS )
vvc_sad_8x8_c: 50.3
vvc_sad_8x8_avx2: 0.3
vvc_sad_16x16_c: 250.3
vvc_sad_16x16_avx2: 10.3
vvc_sad_32x32_c: 1020.3
vvc_sad_32x32_avx2: 60.3
vvc_sad_64x64_c: 3850.3
vvc_sad_64x64_avx2: 220.3
vvc_sad_128x128_c: 14100.3
vvc_sad_128x128_avx2: 840.3
---
  tests/checkasm/vvc_mc.c | 38 ++
  1 file changed, 38 insertions(+)

diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
index 97f57cb401..f2d7a6d561 100644
--- a/tests/checkasm/vvc_mc.c
+++ b/tests/checkasm/vvc_mc.c
@@ -322,8 +322,46 @@ static void check_avg(void)
  report("avg");
  }
  
+static void check_vvc_sad(void)

+{
+const int bit_depth = 10;
+VVCDSPContext c;
+LOCAL_ALIGNED_32(uint16_t, src0, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
+LOCAL_ALIGNED_32(uint16_t, src1, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
+declare_func(int, const int16_t *src0, const int16_t *src1, intptr_t dx, 
intptr_t dy, int block_w, int block_h);


Not related to this patch, but dsp.h should mention alignment 
requirements for all the parameters in dsp functions.



+
+ff_vvc_dsp_init(&c, bit_depth);
+memset(src0, 0, MAX_CTU_SIZE * MAX_CTU_SIZE * 4);
+memset(src1, 0, MAX_CTU_SIZE * MAX_CTU_SIZE * 4);


MAX_CTU_SIZE * MAX_CTU_SIZE * 4 * sizeof(uint16_t)


+
+randomize_pixels(src0, src1, MAX_CTU_SIZE * MAX_CTU_SIZE * 2);


Seeing randomize_buffers() is written for uint8_t buffers, it should be 
updated for this, like so:


#define randomize_buffers(buf0, buf1, size, mask)   \
do {\
int k;  \
for (k = 0; k < size; k += 4 / sizeof(*buf0)) { \
uint32_t r = rnd() & mask;  \
AV_WN32A(buf0 + k, r);  \
AV_WN32A(buf1 + k, r);  \
}   \
} while (0)

And the argument changed to "MAX_CTU_SIZE * MAX_CTU_SIZE * 4". Otherwise 
the loop will write 4 bytes and leave the next 4 untouched.



+ for (int h = 8; h <= MAX_CTU_SIZE; h *= 2) {


There's an extra whitespace of indentation here.


+for (int w = 8; w <= MAX_CTU_SIZE; w *= 2) {
+for(int offy = 0; offy <= 4; offy++) {
+for(int offx = 0; offx <= 4; offx++) {
+if(check_func(c.inter.sad, "vvc_sad_%dx%d", w, h)) {


"sad_%dx%d"


+int result0;
+int result1;
+
+result0 =  call_ref(src0 + PIXEL_STRIDE * 2 + 2, src1 
+ PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
+result1 =  call_new(src0 + PIXEL_STRIDE * 2 + 2, src1 
+ PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
+
+if (result1 != result0)
+fail();
+if(w == h && offx == 0 && offy == 0)
+bench_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + 
PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
+}
+}
+}
+}
+ }
+
+report("check_vvc_sad");


report("sad");


+}
+
  void checkasm_check_vvc_mc(void)
  {
+check_vvc_sad();
  check_put_vvc_luma();
  check_put_vvc_luma_uni();
  check_put_vvc_chroma();

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avformat/mov: avoid seeking back to 0 on HEVC open GOP files

2024-05-21 Thread Philip Langdale via ffmpeg-devel
On Tue, 14 May 2024 19:07:59 -0700
Philip Langdale via ffmpeg-devel  wrote:

> On Wed, 15 May 2024 01:36:43 +0530
> llyyr.pub...@gmail.com wrote:
> 
> > From: llyyr 
> > 
> > ab77b878f1 attempted to fix the issue of broken packets being sent
> > to the decoder by implementing logic that kept attempting to
> > PTS-step backwards until it reached a valid point, however applying
> > this heuristic meant that in files that had no valid points (such
> > as HEVC videos shot on iPhones), we'd seek back to sample 0 on
> > every seek attempt. This meant that files that were previously
> > seekable, albeit with some skipped frames, were not seekable at all
> > now.
> > 
> > Relax this heuristic a bit by giving up on seeking to a valid point
> > if we've tried a different sample and we still don't have a valid
> > point to seek to. This may some frames to be skipped on seeking but
> > it's better than not being able to seek at all in such files.
> > 
> > Fixes: ab77b878f1 ("avformat/mov: fix seeking with HEVC open GOP
> > files") Fixes: #10585
> 
> LGTM.
> 
> I know it's been a _long time_ since you first sent this; I'll push
> next week if there aren't any other comments.
> 

Pushed. Thanks!

--phil
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v9 11/13] avutil/hwcontext_d3d12va: add Flags for resource creation

2024-05-21 Thread Wu, Tong1
>-Original Message-
>From: ffmpeg-devel  On Behalf Of
>Andrew Sayers
>Sent: Wednesday, May 22, 2024 12:32 AM
>To: FFmpeg development discussions and patches de...@ffmpeg.org>
>Subject: Re: [FFmpeg-devel] [PATCH v9 11/13] avutil/hwcontext_d3d12va: add
>Flags for resource creation
>
>(Only reviewing documentation, not code)
>
>On Mon, May 20, 2024 at 10:52:20PM +0800, tong1.wu-at-
>intel@ffmpeg.org wrote:
>> From: Tong Wu 
>>
>> Flags field is added to support diffferent resource creation.
>>
>> Signed-off-by: Tong Wu 
>> ---
>>  doc/APIchanges| 3 +++
>>  libavutil/hwcontext_d3d12va.c | 2 +-
>>  libavutil/hwcontext_d3d12va.h | 8 
>>  libavutil/version.h   | 2 +-
>>  4 files changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/doc/APIchanges b/doc/APIchanges
>> index 269fd36559..808ba02f2d 100644
>> --- a/doc/APIchanges
>> +++ b/doc/APIchanges
>> @@ -2,6 +2,9 @@ The last version increases of all libraries were on 2024-03-
>07
>>
>>  API changes, most recent first:
>>
>> +2024-01-xx - xx - lavu 59.20.100 - hwcontext_d3d12va.h
>> + Add AVD3D12VAFramesContext.flags
>> +
>>  2024-05-xx - xx - lavu 59.19.100 - hwcontext_qsv.h
>>Add AVQSVFramesContext.info
>>
>> diff --git a/libavutil/hwcontext_d3d12va.c b/libavutil/hwcontext_d3d12va.c
>> index cfc016315d..6507cf69c1 100644
>> --- a/libavutil/hwcontext_d3d12va.c
>> +++ b/libavutil/hwcontext_d3d12va.c
>> @@ -247,7 +247,7 @@ static AVBufferRef *d3d12va_pool_alloc(void
>*opaque, size_t size)
>>  .Format   = hwctx->format,
>>  .SampleDesc   = {.Count = 1, .Quality = 0 },
>>  .Layout   = D3D12_TEXTURE_LAYOUT_UNKNOWN,
>> -.Flags= D3D12_RESOURCE_FLAG_NONE,
>> +.Flags= hwctx->flags,
>>  };
>>
>>  frame = av_mallocz(sizeof(AVD3D12VAFrame));
>> diff --git a/libavutil/hwcontext_d3d12va.h b/libavutil/hwcontext_d3d12va.h
>> index ff06e6f2ef..608dbac97f 100644
>> --- a/libavutil/hwcontext_d3d12va.h
>> +++ b/libavutil/hwcontext_d3d12va.h
>> @@ -129,6 +129,14 @@ typedef struct AVD3D12VAFramesContext {
>>   * If unset, will be automatically set.
>>   */
>>  DXGI_FORMAT format;
>> +
>> +/**
>> + * This field is used to specify options for working with resources.
>> + * If unset, this will be D3D12_RESOURCE_FLAG_NONE.
>> + *
>> + * @see: https://learn.microsoft.com/en-
>us/windows/win32/api/d3d12/ne-d3d12-d3d12_resource_flags.
>> + */
>> +D3D12_RESOURCE_FLAGS flags;
>
>Some nitpicks:
>
>* "This field is used to specify" is redundant, you can save the reader
>  a few seconds by starting the sentence with just "Options..."
>* "@see" starts a paragraph, so the rendered documentation will look better
>  without the ":"
>* the full stop after the URL makes it harder to copy/paste the text -
>  remove the full stop or use a [markdown link](...)

Sounds good. I've updated it with a new version.

-Tong


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v10 13/13] Changelog: add D3D12VA HEVC encoder changelog

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Signed-off-by: Tong Wu 
---
 Changelog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Changelog b/Changelog
index 12770e4296..c5d57b3813 100644
--- a/Changelog
+++ b/Changelog
@@ -11,7 +11,7 @@ version :
 - vf_scale2ref deprecated
 - qsv_params option added for QSV encoders
 - VVC decoder compatible with DVB test content
-
+- D3D12VA HEVC encoder
 
 version 7.0:
 - DXV DXT1 encoder
-- 
2.41.0.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v10 12/13] avcodec: add D3D12VA hardware HEVC encoder

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

This implementation is based on D3D12 Video Encoding Spec:
https://microsoft.github.io/DirectX-Specs/d3d/D3D12VideoEncoding.html

Sample command line for transcoding:
ffmpeg.exe -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4
-c:v hevc_d3d12va output.mp4

Signed-off-by: Tong Wu 
---
 configure|6 +
 libavcodec/Makefile  |5 +-
 libavcodec/allcodecs.c   |1 +
 libavcodec/d3d12va_encode.c  | 1558 ++
 libavcodec/d3d12va_encode.h  |  334 +++
 libavcodec/d3d12va_encode_hevc.c | 1007 +++
 6 files changed, 2910 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/d3d12va_encode.c
 create mode 100644 libavcodec/d3d12va_encode.h
 create mode 100644 libavcodec/d3d12va_encode_hevc.c

diff --git a/configure b/configure
index b16722d83d..127d68e60c 100755
--- a/configure
+++ b/configure
@@ -2551,6 +2551,7 @@ CONFIG_EXTRA="
 cbs_mpeg2
 cbs_vp8
 cbs_vp9
+d3d12va_encode
 deflate_wrapper
 dirac_parse
 dnn
@@ -3287,6 +3288,7 @@ wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel"
 wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
 
 # hardware-accelerated codecs
+d3d12va_encode_deps="d3d12va ID3D12VideoEncoder d3d12_encoder_feature"
 mediafoundation_deps="mftransform_h MFCreateAlignedMemoryBuffer"
 omx_deps="libdl pthreads"
 omx_rpi_select="omx"
@@ -3354,6 +3356,7 @@ h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m"
 hevc_amf_encoder_deps="amf"
 hevc_cuvid_decoder_deps="cuvid"
 hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf"
+hevc_d3d12va_encoder_select="cbs_h265 d3d12va_encode"
 hevc_mediacodec_decoder_deps="mediacodec"
 hevc_mediacodec_decoder_select="hevc_mp4toannexb_bsf hevc_parser"
 hevc_mediacodec_encoder_deps="mediacodec"
@@ -6725,6 +6728,9 @@ check_type "windows.h d3d11.h" "ID3D11VideoDecoder"
 check_type "windows.h d3d11.h" "ID3D11VideoContext"
 check_type "windows.h d3d12.h" "ID3D12Device"
 check_type "windows.h d3d12video.h" "ID3D12VideoDecoder"
+check_type "windows.h d3d12video.h" "ID3D12VideoEncoder"
+test_code cc "windows.h d3d12video.h" "D3D12_FEATURE_VIDEO feature = 
D3D12_FEATURE_VIDEO_ENCODER_CODEC" && \
+test_code cc "windows.h d3d12video.h" 
"D3D12_FEATURE_DATA_VIDEO_ENCODER_RESOURCE_REQUIREMENTS req" && enable 
d3d12_encoder_feature
 check_type "windows.h" "DPI_AWARENESS_CONTEXT" -D_WIN32_WINNT=0x0A00
 check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
 check_func_headers mfapi.h MFCreateAlignedMemoryBuffer -lmfplat
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 998f6b7e12..6c4500ce6d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -86,6 +86,7 @@ OBJS-$(CONFIG_CBS_JPEG)+= cbs_jpeg.o
 OBJS-$(CONFIG_CBS_MPEG2)   += cbs_mpeg2.o
 OBJS-$(CONFIG_CBS_VP8) += cbs_vp8.o vp8data.o
 OBJS-$(CONFIG_CBS_VP9) += cbs_vp9.o
+OBJS-$(CONFIG_D3D12VA_ENCODE)  += d3d12va_encode.o hw_base_encode.o
 OBJS-$(CONFIG_DEFLATE_WRAPPER) += zlib_wrapper.o
 OBJS-$(CONFIG_DOVI_RPUDEC) += dovi_rpu.o dovi_rpudec.o
 OBJS-$(CONFIG_DOVI_RPUENC) += dovi_rpu.o dovi_rpuenc.o
@@ -436,6 +437,8 @@ OBJS-$(CONFIG_HEVC_DECODER)+= hevcdec.o 
hevc_mvs.o \
   h274.o aom_film_grain.o
 OBJS-$(CONFIG_HEVC_AMF_ENCODER)+= amfenc_hevc.o
 OBJS-$(CONFIG_HEVC_CUVID_DECODER)  += cuviddec.o
+OBJS-$(CONFIG_HEVC_D3D12VA_ENCODER)+= d3d12va_encode_hevc.o 
h265_profile_level.o \
+  h2645data.o
 OBJS-$(CONFIG_HEVC_MEDIACODEC_DECODER) += mediacodecdec.o
 OBJS-$(CONFIG_HEVC_MEDIACODEC_ENCODER) += mediacodecenc.o
 OBJS-$(CONFIG_HEVC_MF_ENCODER) += mfenc.o mf_utils.o
@@ -1265,7 +1268,7 @@ SKIPHEADERS+= %_tablegen.h
  \
 
 SKIPHEADERS-$(CONFIG_AMF)  += amfenc.h
 SKIPHEADERS-$(CONFIG_D3D11VA)  += d3d11va.h dxva2_internal.h
-SKIPHEADERS-$(CONFIG_D3D12VA)  += d3d12va_decode.h
+SKIPHEADERS-$(CONFIG_D3D12VA)  += d3d12va_decode.h d3d12va_encode.h
 SKIPHEADERS-$(CONFIG_DXVA2)+= dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_JNI)  += ffjni.h
 SKIPHEADERS-$(CONFIG_LCMS2)+= fflcms2.h
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index b102a8069e..463ffbbd08 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -848,6 +848,7 @@ extern const FFCodec ff_h264_vaapi_encoder;
 extern const FFCodec ff_h264_videotoolbox_encoder;
 extern const FFCodec ff_hevc_amf_encoder;
 extern const FFCodec ff_hevc_cuvid_decoder;
+extern const FFCodec ff_hevc_d3d12va_encoder;
 extern const FFCodec ff_hevc_mediacodec_decoder;
 extern const FFCodec ff_hevc_mediacodec_encoder;
 extern const FFCodec ff_hevc_mf_encoder;
diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
new file mode 100644
index 00..a20e6cc961
--- /

[FFmpeg-devel] [PATCH v10 11/13] avutil/hwcontext_d3d12va: add Flags for resource creation

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Flags field is added to support diffferent resource creation.

Signed-off-by: Tong Wu 
---
 doc/APIchanges| 3 +++
 libavutil/hwcontext_d3d12va.c | 2 +-
 libavutil/hwcontext_d3d12va.h | 8 
 libavutil/version.h   | 2 +-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 269fd36559..808ba02f2d 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,6 +2,9 @@ The last version increases of all libraries were on 2024-03-07
 
 API changes, most recent first:
 
+2024-01-xx - xx - lavu 59.20.100 - hwcontext_d3d12va.h
+ Add AVD3D12VAFramesContext.flags
+
 2024-05-xx - xx - lavu 59.19.100 - hwcontext_qsv.h
   Add AVQSVFramesContext.info
 
diff --git a/libavutil/hwcontext_d3d12va.c b/libavutil/hwcontext_d3d12va.c
index cfc016315d..6507cf69c1 100644
--- a/libavutil/hwcontext_d3d12va.c
+++ b/libavutil/hwcontext_d3d12va.c
@@ -247,7 +247,7 @@ static AVBufferRef *d3d12va_pool_alloc(void *opaque, size_t 
size)
 .Format   = hwctx->format,
 .SampleDesc   = {.Count = 1, .Quality = 0 },
 .Layout   = D3D12_TEXTURE_LAYOUT_UNKNOWN,
-.Flags= D3D12_RESOURCE_FLAG_NONE,
+.Flags= hwctx->flags,
 };
 
 frame = av_mallocz(sizeof(AVD3D12VAFrame));
diff --git a/libavutil/hwcontext_d3d12va.h b/libavutil/hwcontext_d3d12va.h
index ff06e6f2ef..212a6a6146 100644
--- a/libavutil/hwcontext_d3d12va.h
+++ b/libavutil/hwcontext_d3d12va.h
@@ -129,6 +129,14 @@ typedef struct AVD3D12VAFramesContext {
  * If unset, will be automatically set.
  */
 DXGI_FORMAT format;
+
+/**
+ * Options for working with resources.
+ * If unset, this will be D3D12_RESOURCE_FLAG_NONE.
+ *
+ * @see 
https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_resource_flags
+ */
+D3D12_RESOURCE_FLAGS flags;
 } AVD3D12VAFramesContext;
 
 #endif /* AVUTIL_HWCONTEXT_D3D12VA_H */
diff --git a/libavutil/version.h b/libavutil/version.h
index 3221c4c592..9c7146c228 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  59
-#define LIBAVUTIL_VERSION_MINOR  19
+#define LIBAVUTIL_VERSION_MINOR  20
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
-- 
2.41.0.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v10 10/13] avcodec/vaapi_encode: extract a free funtion to base layer

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.c | 11 +++
 libavcodec/hw_base_encode.h |  2 ++
 libavcodec/vaapi_encode.c   |  6 +-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
index 42c40cb48f..f743d119cd 100644
--- a/libavcodec/hw_base_encode.c
+++ b/libavcodec/hw_base_encode.c
@@ -748,6 +748,17 @@ fail:
 return err;
 }
 
+int ff_hw_base_encode_free(AVCodecContext *avctx, HWBaseEncodePicture *pic)
+{
+av_frame_free(&pic->input_image);
+av_frame_free(&pic->recon_image);
+
+av_buffer_unref(&pic->opaque_ref);
+av_freep(&pic->priv_data);
+
+return 0;
+}
+
 int ff_hw_base_encode_init(AVCodecContext *avctx)
 {
 HWBaseEncodeContext *ctx = avctx->priv_data;
diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index 7d40da039c..76a39e2d97 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -223,6 +223,8 @@ int ff_hw_base_init_gop_structure(AVCodecContext *avctx, 
uint32_t ref_l0, uint32
 
 int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void *hwconfig, 
enum AVPixelFormat *fmt);
 
+int ff_hw_base_encode_free(AVCodecContext *avctx, HWBaseEncodePicture *pic);
+
 int ff_hw_base_encode_init(AVCodecContext *avctx);
 
 int ff_hw_base_encode_close(AVCodecContext *avctx);
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 98b8c82da3..e89d6e01af 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -878,17 +878,13 @@ static int vaapi_encode_free(AVCodecContext *avctx,
 av_freep(&pic->slices[i].codec_slice_params);
 }
 
-av_frame_free(&base_pic->input_image);
-av_frame_free(&base_pic->recon_image);
-
-av_buffer_unref(&base_pic->opaque_ref);
+ff_hw_base_encode_free(avctx, base_pic);
 
 av_freep(&pic->param_buffers);
 av_freep(&pic->slices);
 // Output buffer should already be destroyed.
 av_assert0(pic->output_buffer == VA_INVALID_ID);
 
-av_freep(&base_pic->priv_data);
 av_freep(&pic->codec_picture_params);
 av_freep(&pic->roi);
 
-- 
2.41.0.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v10 09/13] avcodec/vaapi_encode: extract a get_recon_format function to base layer

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Surface size and block size parameters are also moved to base layer.

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.c | 58 +++
 libavcodec/hw_base_encode.h | 12 +
 libavcodec/vaapi_encode.c   | 81 -
 libavcodec/vaapi_encode.h   | 10 
 libavcodec/vaapi_encode_av1.c   | 10 ++--
 libavcodec/vaapi_encode_h264.c  | 11 +++--
 libavcodec/vaapi_encode_h265.c  | 25 +-
 libavcodec/vaapi_encode_mjpeg.c |  5 +-
 libavcodec/vaapi_encode_vp9.c   |  6 +--
 9 files changed, 118 insertions(+), 100 deletions(-)

diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
index df820c2f83..42c40cb48f 100644
--- a/libavcodec/hw_base_encode.c
+++ b/libavcodec/hw_base_encode.c
@@ -690,6 +690,64 @@ int ff_hw_base_init_gop_structure(AVCodecContext *avctx, 
uint32_t ref_l0, uint32
 return 0;
 }
 
+int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void *hwconfig, 
enum AVPixelFormat *fmt)
+{
+HWBaseEncodeContext *ctx = avctx->priv_data;
+AVHWFramesConstraints *constraints = NULL;
+enum AVPixelFormat recon_format;
+int err, i;
+
+constraints = av_hwdevice_get_hwframe_constraints(ctx->device_ref,
+  hwconfig);
+if (!constraints) {
+err = AVERROR(ENOMEM);
+goto fail;
+}
+
+// Probably we can use the input surface format as the surface format
+// of the reconstructed frames.  If not, we just pick the first (only?)
+// format in the valid list and hope that it all works.
+recon_format = AV_PIX_FMT_NONE;
+if (constraints->valid_sw_formats) {
+for (i = 0; constraints->valid_sw_formats[i] != AV_PIX_FMT_NONE; i++) {
+if (ctx->input_frames->sw_format ==
+constraints->valid_sw_formats[i]) {
+recon_format = ctx->input_frames->sw_format;
+break;
+}
+}
+if (recon_format == AV_PIX_FMT_NONE) {
+// No match.  Just use the first in the supported list and
+// hope for the best.
+recon_format = constraints->valid_sw_formats[0];
+}
+} else {
+// No idea what to use; copy input format.
+recon_format = ctx->input_frames->sw_format;
+}
+av_log(avctx, AV_LOG_DEBUG, "Using %s as format of "
+   "reconstructed frames.\n", av_get_pix_fmt_name(recon_format));
+
+if (ctx->surface_width  < constraints->min_width  ||
+ctx->surface_height < constraints->min_height ||
+ctx->surface_width  > constraints->max_width ||
+ctx->surface_height > constraints->max_height) {
+av_log(avctx, AV_LOG_ERROR, "Hardware does not support encoding at "
+   "size %dx%d (constraints: width %d-%d height %d-%d).\n",
+   ctx->surface_width, ctx->surface_height,
+   constraints->min_width,  constraints->max_width,
+   constraints->min_height, constraints->max_height);
+err = AVERROR(EINVAL);
+goto fail;
+}
+
+*fmt = recon_format;
+err = 0;
+fail:
+av_hwframe_constraints_free(&constraints);
+return err;
+}
+
 int ff_hw_base_encode_init(AVCodecContext *avctx)
 {
 HWBaseEncodeContext *ctx = avctx->priv_data;
diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index d363819329..7d40da039c 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -127,6 +127,16 @@ typedef struct HWBaseEncodeContext {
 // Desired B frame reference depth.
 int desired_b_depth;
 
+// The required size of surfaces.  This is probably the input
+// size (AVCodecContext.width|height) aligned up to whatever
+// block size is required by the codec.
+int surface_width;
+int surface_height;
+
+// The block size for slice calculations.
+int slice_block_width;
+int slice_block_height;
+
 // The hardware device context.
 AVBufferRef*device_ref;
 AVHWDeviceContext *device;
@@ -211,6 +221,8 @@ int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, 
AVPacket *pkt);
 int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0, 
uint32_t ref_l1,
   int flags, int prediction_pre_only);
 
+int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void *hwconfig, 
enum AVPixelFormat *fmt);
+
 int ff_hw_base_encode_init(AVCodecContext *avctx);
 
 int ff_hw_base_encode_close(AVCodecContext *avctx);
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 6ab1b633ed..98b8c82da3 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -1777,6 +1777,7 @@ static av_cold int 
vaapi_encode_init_tile_slice_structure(AVCodecContext *avctx,
 
 static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
 {
+HWBaseEncodeContext *base_ctx = avctx->pr

[FFmpeg-devel] [PATCH v10 06/13] avcodec/vaapi_encode: extract the init and close function to base layer

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Related parameters such as device context, frame context are also moved
to base layer.

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.c | 49 ++
 libavcodec/hw_base_encode.h | 17 +++
 libavcodec/vaapi_encode.c   | 90 +++--
 libavcodec/vaapi_encode.h   | 10 
 libavcodec/vaapi_encode_av1.c   |  2 +-
 libavcodec/vaapi_encode_h264.c  |  2 +-
 libavcodec/vaapi_encode_h265.c  |  2 +-
 libavcodec/vaapi_encode_mjpeg.c |  6 ++-
 8 files changed, 102 insertions(+), 76 deletions(-)

diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
index ec7178d2dc..f2b6ef4a3a 100644
--- a/libavcodec/hw_base_encode.c
+++ b/libavcodec/hw_base_encode.c
@@ -595,3 +595,52 @@ end:
 
 return 0;
 }
+
+int ff_hw_base_encode_init(AVCodecContext *avctx)
+{
+HWBaseEncodeContext *ctx = avctx->priv_data;
+
+ctx->frame = av_frame_alloc();
+if (!ctx->frame)
+return AVERROR(ENOMEM);
+
+if (!avctx->hw_frames_ctx) {
+av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
+   "required to associate the encoding device.\n");
+return AVERROR(EINVAL);
+}
+
+ctx->input_frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
+if (!ctx->input_frames_ref)
+return AVERROR(ENOMEM);
+
+ctx->input_frames = (AVHWFramesContext *)ctx->input_frames_ref->data;
+
+ctx->device_ref = av_buffer_ref(ctx->input_frames->device_ref);
+if (!ctx->device_ref)
+return AVERROR(ENOMEM);
+
+ctx->device = (AVHWDeviceContext *)ctx->device_ref->data;
+
+ctx->tail_pkt = av_packet_alloc();
+if (!ctx->tail_pkt)
+return AVERROR(ENOMEM);
+
+return 0;
+}
+
+int ff_hw_base_encode_close(AVCodecContext *avctx)
+{
+HWBaseEncodeContext *ctx = avctx->priv_data;
+
+av_fifo_freep2(&ctx->encode_fifo);
+
+av_frame_free(&ctx->frame);
+av_packet_free(&ctx->tail_pkt);
+
+av_buffer_unref(&ctx->device_ref);
+av_buffer_unref(&ctx->input_frames_ref);
+av_buffer_unref(&ctx->recon_frames_ref);
+
+return 0;
+}
diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index 2667ae61cd..7e20f57b9c 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -19,6 +19,7 @@
 #ifndef AVCODEC_HW_BASE_ENCODE_H
 #define AVCODEC_HW_BASE_ENCODE_H
 
+#include "libavutil/hwcontext.h"
 #include "libavutil/fifo.h"
 
 #define MAX_DPB_SIZE 16
@@ -118,6 +119,18 @@ typedef struct HWBaseEncodeContext {
 // Hardware-specific hooks.
 const struct HWEncodePictureOperation *op;
 
+// The hardware device context.
+AVBufferRef*device_ref;
+AVHWDeviceContext *device;
+
+// The hardware frame context containing the input frames.
+AVBufferRef*input_frames_ref;
+AVHWFramesContext *input_frames;
+
+// The hardware frame context containing the reconstructed frames.
+AVBufferRef*recon_frames_ref;
+AVHWFramesContext *recon_frames;
+
 // Current encoding window, in display (input) order.
 HWBaseEncodePicture *pic_start, *pic_end;
 // The next picture to use as the previous reference picture in
@@ -184,6 +197,10 @@ typedef struct HWBaseEncodeContext {
 
 int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
 
+int ff_hw_base_encode_init(AVCodecContext *avctx);
+
+int ff_hw_base_encode_close(AVCodecContext *avctx);
+
 #define HW_BASE_ENCODE_COMMON_OPTIONS \
 { "async_depth", "Maximum processing parallelism. " \
   "Increase this to improve single channel performance.", \
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index c4bb93c520..c3ab0fc192 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -314,7 +314,7 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
 
 av_log(avctx, AV_LOG_DEBUG, "Input surface is %#x.\n", pic->input_surface);
 
-err = av_hwframe_get_buffer(ctx->recon_frames_ref, base_pic->recon_image, 
0);
+err = av_hwframe_get_buffer(base_ctx->recon_frames_ref, 
base_pic->recon_image, 0);
 if (err < 0) {
 err = AVERROR(ENOMEM);
 goto fail;
@@ -996,9 +996,10 @@ static const VAEntrypoint 
vaapi_encode_entrypoints_low_power[] = {
 
 static av_cold int vaapi_encode_profile_entrypoint(AVCodecContext *avctx)
 {
-VAAPIEncodeContext  *ctx = avctx->priv_data;
-VAProfile*va_profiles= NULL;
-VAEntrypoint *va_entrypoints = NULL;
+HWBaseEncodeContext *base_ctx = avctx->priv_data;
+VAAPIEncodeContext   *ctx = avctx->priv_data;
+VAProfile *va_profiles= NULL;
+VAEntrypoint  *va_entrypoints = NULL;
 VAStatus vas;
 const VAEntrypoint *usable_entrypoints;
 const VAAPIEncodeProfile *profile;
@@ -1021,10 +1022,10 @@ static av_cold int 
vaapi_encode_profile_entrypoint(AVCodecContext *avctx)
 usable_entrypoints = vaapi_encode_entrypoints_normal;
 }
 
-desc = av_pix_fmt_desc_get(ctx->input_frames->sw_format);
+

[FFmpeg-devel] [PATCH v10 05/13] avcodec/vaapi_encode: move the dpb logic from VAAPI to base layer

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Move receive_packet function to base. This requires adding *alloc,
*issue, *output, *free as hardware callbacks. HWBaseEncodePicture is
introduced as the base layer structure. The related parameters in
VAAPIEncodeContext are also extracted to HWBaseEncodeContext. Then DPB
management logic can be fully extracted to base layer as-is.

Signed-off-by: Tong Wu 
---
 libavcodec/Makefile |   2 +-
 libavcodec/hw_base_encode.c | 597 
 libavcodec/hw_base_encode.h | 124 +
 libavcodec/vaapi_encode.c   | 793 +---
 libavcodec/vaapi_encode.h   | 102 +---
 libavcodec/vaapi_encode_av1.c   |  35 +-
 libavcodec/vaapi_encode_h264.c  |  84 ++--
 libavcodec/vaapi_encode_h265.c  |  53 ++-
 libavcodec/vaapi_encode_mjpeg.c |  13 +-
 libavcodec/vaapi_encode_mpeg2.c |  33 +-
 libavcodec/vaapi_encode_vp8.c   |  18 +-
 libavcodec/vaapi_encode_vp9.c   |  24 +-
 12 files changed, 988 insertions(+), 890 deletions(-)
 create mode 100644 libavcodec/hw_base_encode.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2443d2c6fd..998f6b7e12 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -165,7 +165,7 @@ OBJS-$(CONFIG_STARTCODE)   += startcode.o
 OBJS-$(CONFIG_TEXTUREDSP)  += texturedsp.o
 OBJS-$(CONFIG_TEXTUREDSPENC)   += texturedspenc.o
 OBJS-$(CONFIG_TPELDSP) += tpeldsp.o
-OBJS-$(CONFIG_VAAPI_ENCODE)+= vaapi_encode.o
+OBJS-$(CONFIG_VAAPI_ENCODE)+= vaapi_encode.o hw_base_encode.o
 OBJS-$(CONFIG_AV1_AMF_ENCODER) += amfenc_av1.o
 OBJS-$(CONFIG_VC1DSP)  += vc1dsp.o
 OBJS-$(CONFIG_VIDEODSP)+= videodsp.o
diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
new file mode 100644
index 00..ec7178d2dc
--- /dev/null
+++ b/libavcodec/hw_base_encode.c
@@ -0,0 +1,597 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/log.h"
+#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
+
+#include "encode.h"
+#include "avcodec.h"
+#include "hw_base_encode.h"
+
+static void hw_base_encode_add_ref(AVCodecContext *avctx,
+   HWBaseEncodePicture *pic,
+   HWBaseEncodePicture *target,
+   int is_ref, int in_dpb, int prev)
+{
+int refs = 0;
+
+if (is_ref) {
+av_assert0(pic != target);
+av_assert0(pic->nb_refs[0] < MAX_PICTURE_REFERENCES &&
+   pic->nb_refs[1] < MAX_PICTURE_REFERENCES);
+if (target->display_order < pic->display_order)
+pic->refs[0][pic->nb_refs[0]++] = target;
+else
+pic->refs[1][pic->nb_refs[1]++] = target;
+++refs;
+}
+
+if (in_dpb) {
+av_assert0(pic->nb_dpb_pics < MAX_DPB_SIZE);
+pic->dpb[pic->nb_dpb_pics++] = target;
+++refs;
+}
+
+if (prev) {
+av_assert0(!pic->prev);
+pic->prev = target;
+++refs;
+}
+
+target->ref_count[0] += refs;
+target->ref_count[1] += refs;
+}
+
+static void hw_base_encode_remove_refs(AVCodecContext *avctx,
+   HWBaseEncodePicture *pic,
+   int level)
+{
+int i;
+
+if (pic->ref_removed[level])
+return;
+
+for (i = 0; i < pic->nb_refs[0]; i++) {
+av_assert0(pic->refs[0][i]);
+--pic->refs[0][i]->ref_count[level];
+av_assert0(pic->refs[0][i]->ref_count[level] >= 0);
+}
+
+for (i = 0; i < pic->nb_refs[1]; i++) {
+av_assert0(pic->refs[1][i]);
+--pic->refs[1][i]->ref_count[level];
+av_assert0(pic->refs[1][i]->ref_count[level] >= 0);
+}
+
+for (i = 0; i < pic->nb_dpb_pics; i++) {
+av_assert0(pic->dpb[i]);
+--pic->dpb[i]->ref_count[level];
+av_assert0(pic->dpb[i]->ref_count[level] >= 0);
+}
+
+av_assert0(pic->prev || pic->type == PICTURE_TYPE_IDR);
+if (pic->prev) {
+--pic->prev->ref_count[level];
+av_assert0(pic->prev->ref_count[level] >= 0);
+}
+
+pic->ref_removed[l

[FFmpeg-devel] [PATCH v10 08/13] avcodec/vaapi_encode: extract set_output_property to base layer

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.c | 40 +
 libavcodec/hw_base_encode.h |  3 +++
 libavcodec/vaapi_encode.c   | 44 ++---
 3 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
index dfe20c4e67..df820c2f83 100644
--- a/libavcodec/hw_base_encode.c
+++ b/libavcodec/hw_base_encode.c
@@ -491,6 +491,46 @@ fail:
 return err;
 }
 
+int ff_hw_base_encode_set_output_property(AVCodecContext *avctx,
+  HWBaseEncodePicture *pic,
+  AVPacket *pkt, int flag_no_delay)
+{
+HWBaseEncodeContext *ctx = avctx->priv_data;
+
+if (pic->type == PICTURE_TYPE_IDR)
+pkt->flags |= AV_PKT_FLAG_KEY;
+
+pkt->pts = pic->pts;
+pkt->duration = pic->duration;
+
+// for no-delay encoders this is handled in generic codec
+if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
+avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
+pkt->opaque  = pic->opaque;
+pkt->opaque_ref  = pic->opaque_ref;
+pic->opaque_ref = NULL;
+}
+
+if (flag_no_delay) {
+pkt->dts = pkt->pts;
+return 0;
+}
+
+if (ctx->output_delay == 0) {
+pkt->dts = pkt->pts;
+} else if (pic->encode_order < ctx->decode_delay) {
+if (ctx->ts_ring[pic->encode_order] < INT64_MIN + ctx->dts_pts_diff)
+pkt->dts = INT64_MIN;
+else
+pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
+} else {
+pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
+(3 * ctx->output_delay + ctx->async_depth)];
+}
+
+return 0;
+}
+
 int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
 HWBaseEncodeContext *ctx = avctx->priv_data;
diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index 07936bf9bb..d363819329 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -203,6 +203,9 @@ typedef struct HWBaseEncodeContext {
 AVPacket*tail_pkt;
 } HWBaseEncodeContext;
 
+int ff_hw_base_encode_set_output_property(AVCodecContext *avctx, 
HWBaseEncodePicture *pic,
+  AVPacket *pkt, int flag_no_delay);
+
 int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
 
 int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0, 
uint32_t ref_l1,
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 4db64b686b..6ab1b633ed 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -660,47 +660,6 @@ fail_at_end:
 return err;
 }
 
-static int vaapi_encode_set_output_property(AVCodecContext *avctx,
-HWBaseEncodePicture *pic,
-AVPacket *pkt)
-{
-HWBaseEncodeContext *base_ctx = avctx->priv_data;
-VAAPIEncodeContext   *ctx = avctx->priv_data;
-
-if (pic->type == PICTURE_TYPE_IDR)
-pkt->flags |= AV_PKT_FLAG_KEY;
-
-pkt->pts = pic->pts;
-pkt->duration = pic->duration;
-
-// for no-delay encoders this is handled in generic codec
-if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
-avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
-pkt->opaque = pic->opaque;
-pkt->opaque_ref = pic->opaque_ref;
-pic->opaque_ref = NULL;
-}
-
-if (ctx->codec->flags & FLAG_TIMESTAMP_NO_DELAY) {
-pkt->dts = pkt->pts;
-return 0;
-}
-
-if (base_ctx->output_delay == 0) {
-pkt->dts = pkt->pts;
-} else if (pic->encode_order < base_ctx->decode_delay) {
-if (base_ctx->ts_ring[pic->encode_order] < INT64_MIN + 
base_ctx->dts_pts_diff)
-pkt->dts = INT64_MIN;
-else
-pkt->dts = base_ctx->ts_ring[pic->encode_order] - 
base_ctx->dts_pts_diff;
-} else {
-pkt->dts = base_ctx->ts_ring[(pic->encode_order - 
base_ctx->decode_delay) %
- (3 * base_ctx->output_delay + 
base_ctx->async_depth)];
-}
-
-return 0;
-}
-
 static int vaapi_encode_get_coded_buffer_size(AVCodecContext *avctx, 
VABufferID buf_id)
 {
 VAAPIEncodeContext *ctx = avctx->priv_data;
@@ -852,7 +811,8 @@ static int vaapi_encode_output(AVCodecContext *avctx,
 av_log(avctx, AV_LOG_DEBUG, "Output read for pic %"PRId64"/%"PRId64".\n",
base_pic->display_order, base_pic->encode_order);
 
-vaapi_encode_set_output_property(avctx, (HWBaseEncodePicture*)pic, 
pkt_ptr);
+ff_hw_base_encode_set_output_property(avctx, 
(HWBaseEncodePicture*)base_pic, pkt_ptr,
+  ctx->codec->flags & 
FLAG_TIMESTAMP_NO_DELAY);
 
 end:
 ff_refstruct_unref(&pic->output_buffer_ref);
-- 
2.41.0.windows.1

_

[FFmpeg-devel] [PATCH v10 07/13] avcodec/vaapi_encode: extract gop configuration and two options to base layer

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

idr_interval and desired_b_depth are moved to HW_BASE_ENCODE_COMMON_OPTIONS.

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.c | 54 +
 libavcodec/hw_base_encode.h | 19 +
 libavcodec/vaapi_encode.c   | 52 +++
 libavcodec/vaapi_encode.h   | 16 ---
 4 files changed, 77 insertions(+), 64 deletions(-)

diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
index f2b6ef4a3a..dfe20c4e67 100644
--- a/libavcodec/hw_base_encode.c
+++ b/libavcodec/hw_base_encode.c
@@ -596,6 +596,60 @@ end:
 return 0;
 }
 
+int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0, 
uint32_t ref_l1,
+  int flags, int prediction_pre_only)
+{
+HWBaseEncodeContext *ctx = avctx->priv_data;
+
+if (flags & FLAG_INTRA_ONLY || avctx->gop_size <= 1) {
+av_log(avctx, AV_LOG_VERBOSE, "Using intra frames only.\n");
+ctx->gop_size = 1;
+} else if (ref_l0 < 1) {
+av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
+   "reference frames.\n");
+return AVERROR(EINVAL);
+} else if (!(flags & FLAG_B_PICTURES) || ref_l1 < 1 ||
+   avctx->max_b_frames < 1 || prediction_pre_only) {
+if (ctx->p_to_gpb)
+   av_log(avctx, AV_LOG_VERBOSE, "Using intra and B-frames "
+  "(supported references: %d / %d).\n",
+  ref_l0, ref_l1);
+else
+av_log(avctx, AV_LOG_VERBOSE, "Using intra and P-frames "
+   "(supported references: %d / %d).\n", ref_l0, ref_l1);
+ctx->gop_size = avctx->gop_size;
+ctx->p_per_i  = INT_MAX;
+ctx->b_per_p  = 0;
+} else {
+   if (ctx->p_to_gpb)
+   av_log(avctx, AV_LOG_VERBOSE, "Using intra and B-frames "
+  "(supported references: %d / %d).\n",
+  ref_l0, ref_l1);
+   else
+   av_log(avctx, AV_LOG_VERBOSE, "Using intra, P- and B-frames "
+  "(supported references: %d / %d).\n", ref_l0, ref_l1);
+ctx->gop_size = avctx->gop_size;
+ctx->p_per_i  = INT_MAX;
+ctx->b_per_p  = avctx->max_b_frames;
+if (flags & FLAG_B_PICTURE_REFERENCES) {
+ctx->max_b_depth = FFMIN(ctx->desired_b_depth,
+ av_log2(ctx->b_per_p) + 1);
+} else {
+ctx->max_b_depth = 1;
+}
+}
+
+if (flags & FLAG_NON_IDR_KEY_PICTURES) {
+ctx->closed_gop  = !!(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP);
+ctx->gop_per_idr = ctx->idr_interval + 1;
+} else {
+ctx->closed_gop  = 1;
+ctx->gop_per_idr = 1;
+}
+
+return 0;
+}
+
 int ff_hw_base_encode_init(AVCodecContext *avctx)
 {
 HWBaseEncodeContext *ctx = avctx->priv_data;
diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index 7e20f57b9c..07936bf9bb 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -119,6 +119,14 @@ typedef struct HWBaseEncodeContext {
 // Hardware-specific hooks.
 const struct HWEncodePictureOperation *op;
 
+// Global options.
+
+// Number of I frames between IDR frames.
+int idr_interval;
+
+// Desired B frame reference depth.
+int desired_b_depth;
+
 // The hardware device context.
 AVBufferRef*device_ref;
 AVHWDeviceContext *device;
@@ -197,11 +205,22 @@ typedef struct HWBaseEncodeContext {
 
 int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
 
+int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0, 
uint32_t ref_l1,
+  int flags, int prediction_pre_only);
+
 int ff_hw_base_encode_init(AVCodecContext *avctx);
 
 int ff_hw_base_encode_close(AVCodecContext *avctx);
 
 #define HW_BASE_ENCODE_COMMON_OPTIONS \
+{ "idr_interval", \
+  "Distance (in I-frames) between key frames", \
+  OFFSET(common.base.idr_interval), AV_OPT_TYPE_INT, \
+  { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
+{ "b_depth", \
+  "Maximum B-frame reference depth", \
+  OFFSET(common.base.desired_b_depth), AV_OPT_TYPE_INT, \
+  { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
 { "async_depth", "Maximum processing parallelism. " \
   "Increase this to improve single channel performance.", \
   OFFSET(common.base.async_depth), AV_OPT_TYPE_INT, \
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index c3ab0fc192..4db64b686b 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -1638,7 +1638,7 @@ static av_cold int 
vaapi_encode_init_gop_structure(AVCodecContext *avctx)
 VAStatus vas;
 VAConfigAttrib attr = { VAConfigAttribEncMaxRefFrames };
 uint32_t ref_l0, ref_l1;
-int prediction_pre_only;
+int prediction_pre_only, err;
 
 vas = vaGetConfigAttributes(ctx->hwctx->display,

[FFmpeg-devel] [PATCH v10 04/13] avcodec/vaapi_encode: move pic->input_surface initialization to encode_alloc

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

When allocating the VAAPIEncodePicture, pic->input_surface can be
initialized right in the place. This movement simplifies the send_frame
logic and is the preparation for moving vaapi_encode_send_frame to the base 
layer.

Signed-off-by: Tong Wu 
---
 libavcodec/vaapi_encode.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 2d22e4bd85..227cccae64 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -878,7 +878,8 @@ static int vaapi_encode_discard(AVCodecContext *avctx,
 return 0;
 }
 
-static VAAPIEncodePicture *vaapi_encode_alloc(AVCodecContext *avctx)
+static VAAPIEncodePicture *vaapi_encode_alloc(AVCodecContext *avctx,
+  const AVFrame *frame)
 {
 VAAPIEncodeContext *ctx = avctx->priv_data;
 VAAPIEncodePicture *pic;
@@ -895,7 +896,7 @@ static VAAPIEncodePicture 
*vaapi_encode_alloc(AVCodecContext *avctx)
 }
 }
 
-pic->input_surface = VA_INVALID_ID;
+pic->input_surface = (VASurfaceID)(uintptr_t)frame->data[3];
 pic->recon_surface = VA_INVALID_ID;
 pic->output_buffer = VA_INVALID_ID;
 
@@ -1332,7 +1333,7 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, 
AVFrame *frame)
 if (err < 0)
 return err;
 
-pic = vaapi_encode_alloc(avctx);
+pic = vaapi_encode_alloc(avctx, frame);
 if (!pic)
 return AVERROR(ENOMEM);
 
@@ -1345,7 +1346,6 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, 
AVFrame *frame)
 if (ctx->input_order == 0 || frame->pict_type == AV_PICTURE_TYPE_I)
 pic->force_idr = 1;
 
-pic->input_surface = (VASurfaceID)(uintptr_t)frame->data[3];
 pic->pts = frame->pts;
 pic->duration = frame->duration;
 
-- 
2.41.0.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v10 03/13] avcodec/vaapi_encode: add picture type name to base

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.h | 5 +
 libavcodec/vaapi_encode.c   | 4 +---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index 5272f2836d..a578db8c06 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -25,6 +25,11 @@
 #define MAX_ASYNC_DEPTH 64
 #define MAX_REFERENCE_LIST_NUM 2
 
+static inline const char *ff_hw_base_encode_get_pictype_name(const int type) {
+const char * const picture_type_name[] = { "IDR", "I", "P", "B" };
+return picture_type_name[type];
+}
+
 enum {
 PICTURE_TYPE_IDR = 0,
 PICTURE_TYPE_I   = 1,
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 9373512417..2d22e4bd85 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -38,8 +38,6 @@ const AVCodecHWConfigInternal *const 
ff_vaapi_encode_hw_configs[] = {
 NULL,
 };
 
-static const char * const picture_type_name[] = { "IDR", "I", "P", "B" };
-
 static int vaapi_encode_make_packed_header(AVCodecContext *avctx,
VAAPIEncodePicture *pic,
int type, char *data, size_t 
bit_len)
@@ -277,7 +275,7 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
 
 av_log(avctx, AV_LOG_DEBUG, "Issuing encode for pic %"PRId64"/%"PRId64" "
"as type %s.\n", pic->display_order, pic->encode_order,
-   picture_type_name[pic->type]);
+   ff_hw_base_encode_get_pictype_name(pic->type));
 if (pic->nb_refs[0] == 0 && pic->nb_refs[1] == 0) {
 av_log(avctx, AV_LOG_DEBUG, "No reference pictures.\n");
 } else {
-- 
2.41.0.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v10 02/13] avcodec/vaapi_encode: add async_depth to common options

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.h | 10 +-
 libavcodec/vaapi_encode.c   | 13 -
 libavcodec/vaapi_encode.h   |  7 ---
 libavcodec/vaapi_encode_av1.c   |  1 +
 libavcodec/vaapi_encode_h264.c  |  1 +
 libavcodec/vaapi_encode_h265.c  |  1 +
 libavcodec/vaapi_encode_mjpeg.c |  1 +
 libavcodec/vaapi_encode_mpeg2.c |  1 +
 libavcodec/vaapi_encode_vp8.c   |  1 +
 libavcodec/vaapi_encode_vp9.c   |  1 +
 10 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index 1996179456..5272f2836d 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -50,7 +50,15 @@ enum {
 
 typedef struct HWBaseEncodeContext {
 const AVClass *class;
+
+// Max number of frame buffered in encoder.
+int async_depth;
 } HWBaseEncodeContext;
 
-#endif /* AVCODEC_HW_BASE_ENCODE_H */
+#define HW_BASE_ENCODE_COMMON_OPTIONS \
+{ "async_depth", "Maximum processing parallelism. " \
+  "Increase this to improve single channel performance.", \
+  OFFSET(common.base.async_depth), AV_OPT_TYPE_INT, \
+  { .i64 = 2 }, 1, MAX_ASYNC_DEPTH, FLAGS }
 
+#endif /* AVCODEC_HW_BASE_ENCODE_H */
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index f54b2579ec..9373512417 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -669,7 +669,8 @@ static int vaapi_encode_set_output_property(AVCodecContext 
*avctx,
 VAAPIEncodePicture *pic,
 AVPacket *pkt)
 {
-VAAPIEncodeContext *ctx = avctx->priv_data;
+HWBaseEncodeContext *base_ctx = avctx->priv_data;
+VAAPIEncodeContext   *ctx = avctx->priv_data;
 
 if (pic->type == PICTURE_TYPE_IDR)
 pkt->flags |= AV_PKT_FLAG_KEY;
@@ -699,7 +700,7 @@ static int vaapi_encode_set_output_property(AVCodecContext 
*avctx,
 pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
 } else {
 pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
-(3 * ctx->output_delay + ctx->async_depth)];
+(3 * ctx->output_delay + 
base_ctx->async_depth)];
 }
 
 return 0;
@@ -1320,6 +1321,7 @@ static int vaapi_encode_check_frame(AVCodecContext *avctx,
 
 static int vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
 {
+HWBaseEncodeContext *base_ctx = avctx->priv_data;
 VAAPIEncodeContext *ctx = avctx->priv_data;
 VAAPIEncodePicture *pic;
 int err;
@@ -1365,7 +1367,7 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, 
AVFrame *frame)
 ctx->dts_pts_diff = pic->pts - ctx->first_pts;
 if (ctx->output_delay > 0)
 ctx->ts_ring[ctx->input_order %
-(3 * ctx->output_delay + ctx->async_depth)] = pic->pts;
+(3 * ctx->output_delay + base_ctx->async_depth)] = 
pic->pts;
 
 pic->display_order = ctx->input_order;
 ++ctx->input_order;
@@ -2773,7 +2775,8 @@ static av_cold int 
vaapi_encode_create_recon_frames(AVCodecContext *avctx)
 
 av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
 {
-VAAPIEncodeContext *ctx = avctx->priv_data;
+HWBaseEncodeContext *base_ctx = avctx->priv_data;
+VAAPIEncodeContext   *ctx = avctx->priv_data;
 AVVAAPIFramesContext *recon_hwctx = NULL;
 VAStatus vas;
 int err;
@@ -2966,7 +2969,7 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
 vas = vaSyncBuffer(ctx->hwctx->display, VA_INVALID_ID, 0);
 if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
 ctx->has_sync_buffer_func = 1;
-ctx->encode_fifo = av_fifo_alloc2(ctx->async_depth,
+ctx->encode_fifo = av_fifo_alloc2(base_ctx->async_depth,
   sizeof(VAAPIEncodePicture *),
   0);
 if (!ctx->encode_fifo)
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index f5c9be8973..02410c72ec 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -374,8 +374,6 @@ typedef struct VAAPIEncodeContext {
 int has_sync_buffer_func;
 // Store buffered pic
 AVFifo  *encode_fifo;
-// Max number of frame buffered in encoder.
-int async_depth;
 
 /** Head data for current output pkt, used only for AV1. */
 //void  *header_data;
@@ -491,11 +489,6 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
   "Maximum B-frame reference depth", \
   OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
   { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
-{ "async_depth", "Maximum processing parallelism. " \
-  "Increase this to improve single channel performance. This option " \
-  "doesn't work if driver doesn't implement vaSyncBuffer function.", \
-  OFFSET(common.async_depth), A

[FFmpeg-devel] [PATCH v10 01/13] avcodec/vaapi_encode: introduce a base layer for vaapi encode

2024-05-21 Thread tong1 . wu-at-intel . com
From: Tong Wu 

Since VAAPI and future D3D12VA implementation may share some common parameters,
a base layer encode context is introduced as vaapi context's base.

Signed-off-by: Tong Wu 
---
 libavcodec/hw_base_encode.h | 56 +
 libavcodec/vaapi_encode.h   | 39 +-
 2 files changed, 63 insertions(+), 32 deletions(-)
 create mode 100644 libavcodec/hw_base_encode.h

diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
new file mode 100644
index 00..1996179456
--- /dev/null
+++ b/libavcodec/hw_base_encode.h
@@ -0,0 +1,56 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HW_BASE_ENCODE_H
+#define AVCODEC_HW_BASE_ENCODE_H
+
+#define MAX_DPB_SIZE 16
+#define MAX_PICTURE_REFERENCES 2
+#define MAX_REORDER_DELAY 16
+#define MAX_ASYNC_DEPTH 64
+#define MAX_REFERENCE_LIST_NUM 2
+
+enum {
+PICTURE_TYPE_IDR = 0,
+PICTURE_TYPE_I   = 1,
+PICTURE_TYPE_P   = 2,
+PICTURE_TYPE_B   = 3,
+};
+
+enum {
+// Codec supports controlling the subdivision of pictures into slices.
+FLAG_SLICE_CONTROL = 1 << 0,
+// Codec only supports constant quality (no rate control).
+FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
+// Codec is intra-only.
+FLAG_INTRA_ONLY= 1 << 2,
+// Codec supports B-pictures.
+FLAG_B_PICTURES= 1 << 3,
+// Codec supports referencing B-pictures.
+FLAG_B_PICTURE_REFERENCES  = 1 << 4,
+// Codec supports non-IDR key pictures (that is, key pictures do
+// not necessarily empty the DPB).
+FLAG_NON_IDR_KEY_PICTURES  = 1 << 5,
+};
+
+typedef struct HWBaseEncodeContext {
+const AVClass *class;
+} HWBaseEncodeContext;
+
+#endif /* AVCODEC_HW_BASE_ENCODE_H */
+
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index 0eed9691ca..f5c9be8973 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -33,34 +33,27 @@
 
 #include "avcodec.h"
 #include "hwconfig.h"
+#include "hw_base_encode.h"
 
 struct VAAPIEncodeType;
 struct VAAPIEncodePicture;
 
+// Codec output packet without timestamp delay, which means the
+// output packet has same PTS and DTS.
+#define FLAG_TIMESTAMP_NO_DELAY 1 << 6
+
 enum {
 MAX_CONFIG_ATTRIBUTES  = 4,
 MAX_GLOBAL_PARAMS  = 4,
-MAX_DPB_SIZE   = 16,
-MAX_PICTURE_REFERENCES = 2,
-MAX_REORDER_DELAY  = 16,
 MAX_PARAM_BUFFER_SIZE  = 1024,
 // A.4.1: table A.6 allows at most 22 tile rows for any level.
 MAX_TILE_ROWS  = 22,
 // A.4.1: table A.6 allows at most 20 tile columns for any level.
 MAX_TILE_COLS  = 20,
-MAX_ASYNC_DEPTH= 64,
-MAX_REFERENCE_LIST_NUM = 2,
 };
 
 extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
 
-enum {
-PICTURE_TYPE_IDR = 0,
-PICTURE_TYPE_I   = 1,
-PICTURE_TYPE_P   = 2,
-PICTURE_TYPE_B   = 3,
-};
-
 typedef struct VAAPIEncodeSlice {
 int index;
 int row_start;
@@ -193,7 +186,8 @@ typedef struct VAAPIEncodeRCMode {
 } VAAPIEncodeRCMode;
 
 typedef struct VAAPIEncodeContext {
-const AVClass *class;
+// Base context.
+HWBaseEncodeContext base;
 
 // Codec-specific hooks.
 const struct VAAPIEncodeType *codec;
@@ -397,25 +391,6 @@ typedef struct VAAPIEncodeContext {
 AVPacket*tail_pkt;
 } VAAPIEncodeContext;
 
-enum {
-// Codec supports controlling the subdivision of pictures into slices.
-FLAG_SLICE_CONTROL = 1 << 0,
-// Codec only supports constant quality (no rate control).
-FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
-// Codec is intra-only.
-FLAG_INTRA_ONLY= 1 << 2,
-// Codec supports B-pictures.
-FLAG_B_PICTURES= 1 << 3,
-// Codec supports referencing B-pictures.
-FLAG_B_PICTURE_REFERENCES  = 1 << 4,
-// Codec supports non-IDR key pictures (that is, key pictures do
-// not necessarily empty the DPB).
-FLAG_NON_IDR_KEY_PICTURES  = 1 << 5,
-// Codec output packet without timestamp delay, which means the
-// output packet has same PTS and DTS.
-FLAG_TIMESTAMP_NO_DELAY= 1 << 6,
-};
-
 typedef struct VAAPIEncodeType {
 // List of supported profiles and correspond

Re: [FFmpeg-devel] [PATCH v5 2/2][GSoC 2024] tests/checkasm: Add check_vvc_sad to vvc_mc.c

2024-05-21 Thread Ronald S. Bultje
Hi,

On Tue, May 21, 2024 at 8:01 PM Stone Chen  wrote:

> Adds checkasm for DMVR SAD AVX2 implementation.
>
> Benchmarks ( AMD 7940HS )
> vvc_sad_8x8_c: 50.3
> vvc_sad_8x8_avx2: 0.3
> vvc_sad_16x16_c: 250.3
> vvc_sad_16x16_avx2: 10.3
> vvc_sad_32x32_c: 1020.3
> vvc_sad_32x32_avx2: 60.3
> vvc_sad_64x64_c: 3850.3
> vvc_sad_64x64_avx2: 220.3
> vvc_sad_128x128_c: 14100.3
> vvc_sad_128x128_avx2: 840.3
> ---
>  tests/checkasm/vvc_mc.c | 38 ++
>  1 file changed, 38 insertions(+)
>

LGTM.

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v5 1/2][GSoC 2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC

2024-05-21 Thread Ronald S. Bultje
Hi,

On Tue, May 21, 2024 at 8:01 PM Stone Chen  wrote:

> Implements AVX2 DMVR (decoder-side motion vector refinement) SAD
> functions. DMVR SAD is only calculated if w >= 8, h >= 8, and w * h > 128.
> To reduce complexity, SAD is only calculated on even rows. This is
> calculated for all video bitdepths, but the values passed to the function
> are always 16bit (even if the original video bitdepth is 8). The AVX2
> implementation uses min/max/sub.
>
> Additionally this changes parameters dx and dy from int to intptr_t. This
> allows dx & dy to be used as pointer offsets without needing to use movsxd.
>
> Benchmarks ( AMD 7940HS )
> Before:
> BQTerrace_1920x1080_60_10_420_22_RA.vvc | 106.0 |
> Chimera_8bit_1080P_1000_frames.vvc | 204.3 |
> NovosobornayaSquare_1920x1080.bin | 197.3 |
> RitualDance_1920x1080_60_10_420_37_RA.266 | 174.0 |
>
> After:
> BQTerrace_1920x1080_60_10_420_22_RA.vvc | 109.3 |
> Chimera_8bit_1080P_1000_frames.vvc | 216.0 |
> NovosobornayaSquare_1920x1080.bin | 204.0|
> RitualDance_1920x1080_60_10_420_37_RA.266 | 181.7 |
> ---
>  libavcodec/vvc/dsp.c |   2 +-
>  libavcodec/vvc/dsp.h |   2 +-
>  libavcodec/x86/vvc/Makefile  |   3 +-
>  libavcodec/x86/vvc/vvc_sad.asm   | 130 +++
>  libavcodec/x86/vvc/vvcdsp_init.c |   6 ++
>  5 files changed, 140 insertions(+), 3 deletions(-)
>  create mode 100644 libavcodec/x86/vvc/vvc_sad.asm
>

LGTM.

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4 1/2][GSoC 2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC

2024-05-21 Thread Stone Chen
On Mon, May 20, 2024 at 7:23 AM Ronald S. Bultje  wrote:

> Hi,
>
> This is mostly good, the following is tiny nitpicks.
>
> On Sun, May 19, 2024 at 8:46 PM Stone Chen 
> wrote:
>
>> +%macro INIT_OFFSET 6 ; src1, src2, dxq, dyq, off1, off2
>>
>
> The macro is only used once, so you could inline it in the calling
> function.
>
>>
>> +imul%5, 128
>> +imul%6, 128
>>
>
> I believe shl is typically preferred over imul for powers of two.
>
>
>> +add %5, 2
>> +add %6, 2
>>
>
> And these can be integrated as a constant offset in the lea below (lea %1,
> [%1 + %5 * 2 + 2 * 2], same for %2).
>
>
>> +add %5, %3
>> +sub %6, %3
>> +
>> +lea %1, [%1 + %5 * 2]
>> +lea %2, [%2 + %6 * 2]
>
> [..]
>
>> +cglobal vvc_sad, 6, 11, 5, src1, src2, dx, dy, block_w, block_h, off1,
>> off2, row_idx, dx2, dy2
>> +movsxd   dx2q, dxd
>> +movsxd   dy2q, dyd
>>
>
> If you change the argument type from int to intptr_t, this is not
> necessary anymore.
>
>
>> +vvc_sad_16_128:
>> +.loop_height:
>> +mov off1q, src1q
>> +mov off2q, src2q
>> +mov  row_idxd, block_wd
>> +sar  row_idxd, 4
>>
>
> You could right-shift block_wd by 4 outside the loop (before .loop_height).
>
> Ronald
>

On Mon, May 20, 2024 at 11:53 AM Ronald S. Bultje 
wrote:

> Hi,
>
> one more, I forgot.
>
> On Sun, May 19, 2024 at 8:46 PM Stone Chen 
> wrote:
>
>> +pw_1: dw 1
>>
> [..]
>
>> +vpbroadcastw   m4, [pw_1]
>>
>
> We typically suggest to use vpbroadcastd, not w (and then pw_1: times 2 dw
> 1). agner shows that on e.g. Haswell, the former (d) is 1 uops with 5
> cycles latency, whereas the latter (w) is 3 uops with 7 cycles latency, or
> more generally d is faster then w.
>
> Ronald
>

Hi Ronald,

I've sent a v5 incorporating all the above, thank you for the feedback!

-Stone
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 13/13] avformat/flvdec: support all multi-track modes

2024-05-21 Thread Michael Niedermayer
On Tue, May 21, 2024 at 11:02:22AM +0200, Timo Rothenpieler wrote:
> ---
>  libavformat/flvdec.c | 570 +++
>  1 file changed, 306 insertions(+), 264 deletions(-)

infinite loops

[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmpeg.org)
[flv @ 0x555e803d2940] Video codec (0) is not implemented. Update your FFmpeg 
version to the newest one from Git. If the problem still occurs, it means that 
your file has a feature which has not been implemented.
[flv @ 0x555e803d2940] If you want to help, upload a sample of this file to 
https://streams.videolan.org/upload/ and contact the ffmpeg-devel mailing list. 
(ffmpeg-devel@ffmp

[FFmpeg-devel] [PATCH v5 2/2][GSoC 2024] tests/checkasm: Add check_vvc_sad to vvc_mc.c

2024-05-21 Thread Stone Chen
Adds checkasm for DMVR SAD AVX2 implementation.

Benchmarks ( AMD 7940HS )
vvc_sad_8x8_c: 50.3
vvc_sad_8x8_avx2: 0.3
vvc_sad_16x16_c: 250.3
vvc_sad_16x16_avx2: 10.3
vvc_sad_32x32_c: 1020.3
vvc_sad_32x32_avx2: 60.3
vvc_sad_64x64_c: 3850.3
vvc_sad_64x64_avx2: 220.3
vvc_sad_128x128_c: 14100.3
vvc_sad_128x128_avx2: 840.3
---
 tests/checkasm/vvc_mc.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
index 97f57cb401..f2d7a6d561 100644
--- a/tests/checkasm/vvc_mc.c
+++ b/tests/checkasm/vvc_mc.c
@@ -322,8 +322,46 @@ static void check_avg(void)
 report("avg");
 }
 
+static void check_vvc_sad(void)
+{
+const int bit_depth = 10;
+VVCDSPContext c;
+LOCAL_ALIGNED_32(uint16_t, src0, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
+LOCAL_ALIGNED_32(uint16_t, src1, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
+declare_func(int, const int16_t *src0, const int16_t *src1, intptr_t dx, 
intptr_t dy, int block_w, int block_h);
+
+ff_vvc_dsp_init(&c, bit_depth);
+memset(src0, 0, MAX_CTU_SIZE * MAX_CTU_SIZE * 4);
+memset(src1, 0, MAX_CTU_SIZE * MAX_CTU_SIZE * 4);
+
+randomize_pixels(src0, src1, MAX_CTU_SIZE * MAX_CTU_SIZE * 2);
+ for (int h = 8; h <= MAX_CTU_SIZE; h *= 2) {
+for (int w = 8; w <= MAX_CTU_SIZE; w *= 2) {
+for(int offy = 0; offy <= 4; offy++) {
+for(int offx = 0; offx <= 4; offx++) {
+if(check_func(c.inter.sad, "vvc_sad_%dx%d", w, h)) {
+int result0;
+int result1;
+
+result0 =  call_ref(src0 + PIXEL_STRIDE * 2 + 2, src1 
+ PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
+result1 =  call_new(src0 + PIXEL_STRIDE * 2 + 2, src1 
+ PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
+
+if (result1 != result0)
+fail();
+if(w == h && offx == 0 && offy == 0)
+bench_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + 
PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
+}
+}
+}
+}
+ }
+
+report("check_vvc_sad");
+}
+
 void checkasm_check_vvc_mc(void)
 {
+check_vvc_sad();
 check_put_vvc_luma();
 check_put_vvc_luma_uni();
 check_put_vvc_chroma();
-- 
2.45.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v5 1/2][GSoC 2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC

2024-05-21 Thread Stone Chen
Implements AVX2 DMVR (decoder-side motion vector refinement) SAD functions. 
DMVR SAD is only calculated if w >= 8, h >= 8, and w * h > 128. To reduce 
complexity, SAD is only calculated on even rows. This is calculated for all 
video bitdepths, but the values passed to the function are always 16bit (even 
if the original video bitdepth is 8). The AVX2 implementation uses min/max/sub.

Additionally this changes parameters dx and dy from int to intptr_t. This 
allows dx & dy to be used as pointer offsets without needing to use movsxd.

Benchmarks ( AMD 7940HS )
Before:
BQTerrace_1920x1080_60_10_420_22_RA.vvc | 106.0 |
Chimera_8bit_1080P_1000_frames.vvc | 204.3 |
NovosobornayaSquare_1920x1080.bin | 197.3 |
RitualDance_1920x1080_60_10_420_37_RA.266 | 174.0 |

After:
BQTerrace_1920x1080_60_10_420_22_RA.vvc | 109.3 |
Chimera_8bit_1080P_1000_frames.vvc | 216.0 |
NovosobornayaSquare_1920x1080.bin | 204.0|
RitualDance_1920x1080_60_10_420_37_RA.266 | 181.7 |
---
 libavcodec/vvc/dsp.c |   2 +-
 libavcodec/vvc/dsp.h |   2 +-
 libavcodec/x86/vvc/Makefile  |   3 +-
 libavcodec/x86/vvc/vvc_sad.asm   | 130 +++
 libavcodec/x86/vvc/vvcdsp_init.c |   6 ++
 5 files changed, 140 insertions(+), 3 deletions(-)
 create mode 100644 libavcodec/x86/vvc/vvc_sad.asm

diff --git a/libavcodec/vvc/dsp.c b/libavcodec/vvc/dsp.c
index 41e830a98a..aded1a2f9f 100644
--- a/libavcodec/vvc/dsp.c
+++ b/libavcodec/vvc/dsp.c
@@ -46,7 +46,7 @@ static void av_always_inline pad_int16(int16_t *_dst, const 
ptrdiff_t dst_stride
 memcpy(_dst, _dst - dst_stride, padded_width * sizeof(int16_t));
 }
 
-static int vvc_sad(const int16_t *src0, const int16_t *src1, int dx, int dy,
+static int vvc_sad(const int16_t *src0, const int16_t *src1, intptr_t dx, 
intptr_t dy,
 const int block_w, const int block_h)
 {
 int sad = 0;
diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index 9810ac314c..213337358b 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext {
 
 void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t *src0, 
int16_t *src1, int block_w, int block_h);
 
-int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int 
block_w, int block_h);
+int (*sad)(const int16_t *src0, const int16_t *src1, intptr_t dx, intptr_t 
dy, int block_w, int block_h);
 void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, 
int height,
 intptr_t mx, intptr_t my, int width);
 } VVCInterDSPContext;
diff --git a/libavcodec/x86/vvc/Makefile b/libavcodec/x86/vvc/Makefile
index d6a66f860a..7b2438ce17 100644
--- a/libavcodec/x86/vvc/Makefile
+++ b/libavcodec/x86/vvc/Makefile
@@ -5,4 +5,5 @@ OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/vvcdsp_init.o 
\
   x86/h26x/h2656dsp.o
 X86ASM-OBJS-$(CONFIG_VVC_DECODER)  += x86/vvc/vvc_alf.o  \
   x86/vvc/vvc_mc.o   \
-  x86/h26x/h2656_inter.o
+  x86/vvc/vvc_sad.o  \
+  x86/h26x/h2656_inter.o 
diff --git a/libavcodec/x86/vvc/vvc_sad.asm b/libavcodec/x86/vvc/vvc_sad.asm
new file mode 100644
index 00..9766446b11
--- /dev/null
+++ b/libavcodec/x86/vvc/vvc_sad.asm
@@ -0,0 +1,130 @@
+; /*
+; * Provide SIMD DMVR SAD functions for VVC decoding
+; *
+; * Copyright (c) 2024 Stone Chen
+; *
+; * This file is part of FFmpeg.
+; *
+; * FFmpeg is free software; you can redistribute it and/or
+; * modify it under the terms of the GNU Lesser General Public
+; * License as published by the Free Software Foundation; either
+; * version 2.1 of the License, or (at your option) any later version.
+; *
+; * FFmpeg is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; * Lesser General Public License for more details.
+; *
+; * You should have received a copy of the GNU Lesser General Public
+; * License along with FFmpeg; if not, write to the Free Software
+; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+; */
+
+%include "libavutil/x86/x86util.asm"
+%define MAX_PB_SIZE 128
+%define ROWS 2
+
+SECTION_RODATA
+
+pw_1: times 2 dw 1
+
+; DMVR SAD is only calculated on even rows to reduce complexity
+SECTION .text
+
+%macro MIN_MAX_SAD 3 ; 
+pminuw   %3, %2, %1
+pmaxuw   %1, %2, %1
+psubusw  %1, %1, %3
+%endmacro
+
+%macro HORIZ_ADD 3  ; xm0, xm1, m1
+vextracti128 %1, %3, q0001  ;32  1  0
+paddd%1, %2 ; xm0 (7 + 3) (6 + 2) (5 + 1)   (4 + 0)
+pshufd   %2, %1, q0032  ; xm1-  - (7 + 3)   (6 + 2)
+paddd%1, %1, %2 ; xm0_  _ (5 1 7 3

Re: [FFmpeg-devel] [PATCH v2 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

2024-05-21 Thread Lynne via ffmpeg-devel

On 21/05/2024 23:33, Hendrik Leppkes wrote:

On Tue, May 21, 2024 at 9:52 PM Lynne via ffmpeg-devel
 wrote:



It should be the case here, we shouldn't need reordering as NATIVE just
lets you specify what order the elements appear in the bitstream.


NATIVE means "the FFmpeg native ordering", not "bitstream order".
CUSTOM lets you specify an arbitrary order but requires metadata to
that effect, but it makes it particularly hard to map to any standard
when playing or transcoding, so some efforts to try to unify it into a
NATIVE format is always appreciated if possible.


Right, I forgot about that, thanks.
Amended in my git repo to use Marton's code.


OpenPGP_0xA2FEA5F03F034464.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [RFC] STF 2025

2024-05-21 Thread Thilo Borgmann via ffmpeg-devel



On 21.05.24 21:43, Rémi Denis-Courmont wrote:

Le tiistaina 21. toukokuuta 2024, 22.42.00 EEST Rémi Denis-Courmont a écrit :

And "I hope you realise that you are arguing for" Intel, Loongson, etc.
employees to stop reviewing patches.


P.S.: And FFlabs too, since it is a for-profit company.


Same remark as in the previous mail. I'm not sure how you mean that 
whole thing. Please elaborate / put in other words.


-Thilo
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [RFC] STF 2025

2024-05-21 Thread Thilo Borgmann via ffmpeg-devel



On 21.05.24 21:42, Rémi Denis-Courmont wrote:

Le tiistaina 21. toukokuuta 2024, 21.43.44 EEST Thilo Borgmann via ffmpeg-devel
a écrit :

Same as above about that we should and STF would.
Especially since no corporate interest usually pays anyone for these
tasks


Sadly true, but...


(in case of reviews it might of course be considered a good thing).


I think some review is better than none. There may be conflict of
interests, but they are weighed by the risk of being caught abusing the
review process.

I hope you realize what you argue in favor of.


Yes. It's quoted above.

Are you claiming that *no* review is better than *some* review done in
*public* for all to see by a paid professional just because the person is
maybe biased?

First, even volunteers have their own biases. Any expert should have opinions
from their experience, and that by definition makes them "biased".

And second, you can't have it both ways. Either we want people to be paid for
review, and they will be answerable to their sponsor, or we want people to
continue to work on their free time.


I think that is what you don't understand.
An STF sponsorship for review would not introduce any bias in favor or 
against some patch or sth related.
A company sponsorship would as it would introduce a bias towards 'we 
want our stuff in'.

STF has no stuff they want to be reviewed on their behalf.
They are only in favor of stuff being reviewed.



STF is an agency of the German government, applying German government
policies. They certainly do seem to have their own biases, including on tech,
e.g.: https://www.theregister.com/2024/05/20/huawei_germany_ban/ to take just
the most recent example to come to mind.


No. Does not apply to any funding we might get.



Reviews need to be unbiased and independent.


Ideally so but that's the land of utopia.


Of course, we talk about what should be, don't we?



STF sponsoring reviews could be an excellent help towards this.


If STF is willing to sponsor reviews, that's welcome. But that would certainly
not be "independent".


It would. As STF would not send patches we'd be obliged to review.
They'd give us money just for the sake of review 'whatever comes our way'.



Corporate influence on the review process already happened in the past
and the chance of getting caught is almost zero.


So how do you that it happened if it does not get caught?


I assume you mean how I know that and the guilty ones did not get 
caught? Well they did. An answer in public I will give not.




And "I hope you realise that you are arguing for" Intel, Loongson, etc.
employees to stop reviewing patches.


Syntax error. What exactly do you mean?
According to my assumptions: No, I value reviews of company employees 
in general which have been proven to be useful and unbiased e.g. in 
getting part of the community reviewing 'stuf' but not their 'own stuff'.


-Thilo
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

2024-05-21 Thread Hendrik Leppkes
On Tue, May 21, 2024 at 9:52 PM Lynne via ffmpeg-devel
 wrote:
>
>
> It should be the case here, we shouldn't need reordering as NATIVE just
> lets you specify what order the elements appear in the bitstream.

NATIVE means "the FFmpeg native ordering", not "bitstream order".
CUSTOM lets you specify an arbitrary order but requires metadata to
that effect, but it makes it particularly hard to map to any standard
when playing or transcoding, so some efforts to try to unify it into a
NATIVE format is always appreciated if possible.

- Hendrik
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

2024-05-21 Thread Marton Balint



On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:


On 21/05/2024 21:40, Marton Balint wrote:



 On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:


 On 21/05/2024 09:16, Marton Balint wrote:



  On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


  On 19/05/2024 21:39, Marton Balint wrote:



   On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


   This commit adds a decoder for the frequency-domain part of USAC.


   [...]



   +/* Finish later */
   +static const enum AVChannel usac_ch_pos_to_av[64] = {
   +    [0] = AV_CHAN_FRONT_LEFT,
   +    [1] = AV_CHAN_FRONT_RIGHT,
   +    [2] = AV_CHAN_FRONT_CENTER,
   +    [3] = AV_CHAN_LOW_FREQUENCY,
   +    [4] = AV_CHAN_BACK_LEFT, // unsure
   +    [5] = AV_CHAN_BACK_RIGHT, // unsure
   +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
   +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
   +    [8] = 0, /* rear surround left is missing */
   +    [9] = 0, /* rear surround right is missing */
   +    [10] = AV_CHAN_BACK_CENTER,
   +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
   +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
   +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
   +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
   +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
   +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
   +    [17] = AV_CHAN_TOP_FRONT_LEFT,
   +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
   +    [19] = AV_CHAN_TOP_FRONT_CENTER,
   +    [20] = AV_CHAN_TOP_BACK_LEFT,
   +    [21] = AV_CHAN_TOP_BACK_RIGHT,
   +    [22] = AV_CHAN_TOP_BACK_CENTER,
   +    [23] = AV_CHAN_TOP_SIDE_LEFT,
   +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
   +    [25] = AV_CHAN_TOP_CENTER,
   +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
   +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
   +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
   +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
   +    [30] = 0, /* top left surround is missing */
   +    [31] = 0, /* top right surround is missing */
   +};


   Some comment would be nice about the source of this table (which
  document,
   which table).

   It looks very similar to the ISO channel positons used in mov_chan.
 I
   think we follow this mapping in most cases:

   Left  Surround is SIDE_LEFT
   Right Surround is SIDE_RIGHT
   Rear Surround Left  is BACK_LEFT
   Rear Surround Right is BACK_RIGHT

   So in your table [4] and [5] should be SIDE, [8] and [9] should be
  BACK.
   [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.

   Yes, Left/Right Surround and Left/Right Side Surround will be the
 same,
   but those are not present in commonly used layouts at the same time.

   Regards,
   Marton
   ___
   ffmpeg-devel mailing list
   ffmpeg-devel@ffmpeg.org
   https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

   To unsubscribe, visit link above, or email
   ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


  Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:

  0 L left front FL front left
  1 R right front FR front right
  2 C center front FC front centre
  3 LFE low frequency enhancement LFE1 low frequency effects-1
  4 Ls left surround LS left surround
  5 Rs right surround RS right surround
  6 Lc left front center FLc front left centre
  7 Rc right front center FRc front right centre
  8 Lsr rear surround left BL back left
  9 Rsr rear surround right BR back right
  10 Cs rear center BC back centre
  11 Lsd left surround direct LSd left surround direct
  12 Rsd right surround direct RSd right surround direct
  13 Lss left side surround SL side left
  14 Rss right side surround SR side right
  15 Lw left wide front FLw front left wide
  16 Rw right wide front FRw front right wide
  17 Lv left front vertical height TpFL top front left
  18 Rv right front vertical height TpFR top front right
  19 Cv center front vertical height TpFC top front centre
  20 Lvr left surround vertical height rear TpBL top back left
  21 Rvr right surround vertical height rear TpBR top back right
  22 Cvr center vertical height rear TpBC top back centre
  23 Lvss left vertical height side surround TpSiL top side left
  24 Rvss right vertical height side surround TpSiR top side right
  25 Ts top center surround TpC top centre
  26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
  27 Lb left front vertical bottom BtFL bottom front left
  28 Rb right front vertical bottom BtFR bottom front right
  29 Cb center front vertical bottom BtFC bottom front centre
  30 Lvs left vertical height surround TpLS top left surround
  31 Rvs right vertical height surround TpRS top right surround

  Third field is "Loudspeaker position", last field is "Loudspeaker
  position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each
 prefixed
  with an abbreviation.

  I've added the source to the table comment in the code.

  I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked
  earlier.


  Thanks. Later in the code when you actually use this I can see that you
  are creating a native 

Re: [FFmpeg-devel] [PATCH v2 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

2024-05-21 Thread Lynne via ffmpeg-devel

On 21/05/2024 21:40, Marton Balint wrote:



On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:


On 21/05/2024 09:16, Marton Balint wrote:



 On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


 On 19/05/2024 21:39, Marton Balint wrote:



  On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


  This commit adds a decoder for the frequency-domain part of USAC.


  [...]



  +/* Finish later */
  +static const enum AVChannel usac_ch_pos_to_av[64] = {
  +    [0] = AV_CHAN_FRONT_LEFT,
  +    [1] = AV_CHAN_FRONT_RIGHT,
  +    [2] = AV_CHAN_FRONT_CENTER,
  +    [3] = AV_CHAN_LOW_FREQUENCY,
  +    [4] = AV_CHAN_BACK_LEFT, // unsure
  +    [5] = AV_CHAN_BACK_RIGHT, // unsure
  +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
  +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
  +    [8] = 0, /* rear surround left is missing */
  +    [9] = 0, /* rear surround right is missing */
  +    [10] = AV_CHAN_BACK_CENTER,
  +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
  +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
  +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
  +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
  +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
  +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
  +    [17] = AV_CHAN_TOP_FRONT_LEFT,
  +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
  +    [19] = AV_CHAN_TOP_FRONT_CENTER,
  +    [20] = AV_CHAN_TOP_BACK_LEFT,
  +    [21] = AV_CHAN_TOP_BACK_RIGHT,
  +    [22] = AV_CHAN_TOP_BACK_CENTER,
  +    [23] = AV_CHAN_TOP_SIDE_LEFT,
  +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
  +    [25] = AV_CHAN_TOP_CENTER,
  +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
  +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
  +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
  +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
  +    [30] = 0, /* top left surround is missing */
  +    [31] = 0, /* top right surround is missing */
  +};


  Some comment would be nice about the source of this table (which
 document,
  which table).

  It looks very similar to the ISO channel positons used in 
mov_chan. I

  think we follow this mapping in most cases:

  Left  Surround is SIDE_LEFT
  Right Surround is SIDE_RIGHT
  Rear Surround Left  is BACK_LEFT
  Rear Surround Right is BACK_RIGHT

  So in your table [4] and [5] should be SIDE, [8] and [9] should be
 BACK.
  [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.

  Yes, Left/Right Surround and Left/Right Side Surround will be the 
same,

  but those are not present in commonly used layouts at the same time.

  Regards,
  Marton
  ___
  ffmpeg-devel mailing list
  ffmpeg-devel@ffmpeg.org
  https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

  To unsubscribe, visit link above, or email
  ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


 Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:

 0 L left front FL front left
 1 R right front FR front right
 2 C center front FC front centre
 3 LFE low frequency enhancement LFE1 low frequency effects-1
 4 Ls left surround LS left surround
 5 Rs right surround RS right surround
 6 Lc left front center FLc front left centre
 7 Rc right front center FRc front right centre
 8 Lsr rear surround left BL back left
 9 Rsr rear surround right BR back right
 10 Cs rear center BC back centre
 11 Lsd left surround direct LSd left surround direct
 12 Rsd right surround direct RSd right surround direct
 13 Lss left side surround SL side left
 14 Rss right side surround SR side right
 15 Lw left wide front FLw front left wide
 16 Rw right wide front FRw front right wide
 17 Lv left front vertical height TpFL top front left
 18 Rv right front vertical height TpFR top front right
 19 Cv center front vertical height TpFC top front centre
 20 Lvr left surround vertical height rear TpBL top back left
 21 Rvr right surround vertical height rear TpBR top back right
 22 Cvr center vertical height rear TpBC top back centre
 23 Lvss left vertical height side surround TpSiL top side left
 24 Rvss right vertical height side surround TpSiR top side right
 25 Ts top center surround TpC top centre
 26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
 27 Lb left front vertical bottom BtFL bottom front left
 28 Rb right front vertical bottom BtFR bottom front right
 29 Cb center front vertical bottom BtFC bottom front centre
 30 Lvs left vertical height surround TpLS top left surround
 31 Rvs right vertical height surround TpRS top right surround

 Third field is "Loudspeaker position", last field is "Loudspeaker
 position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each 
prefixed

 with an abbreviation.

 I've added the source to the table comment in the code.

 I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked
 earlier.


 Thanks. Later in the code when you actually use this I can see that you
 are creating a native layout:

 +    channel_config_idx = get_bits(gb, 5); /* 
channelConfigurationIndex

 */
 +    if (!channel_config_idx) {
 +    /* UsacChannelConfig() */

Re: [FFmpeg-devel] [RFC] STF 2025

2024-05-21 Thread Rémi Denis-Courmont
Le tiistaina 21. toukokuuta 2024, 22.42.00 EEST Rémi Denis-Courmont a écrit :
> And "I hope you realise that you are arguing for" Intel, Loongson, etc.
> employees to stop reviewing patches.

P.S.: And FFlabs too, since it is a for-profit company.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [RFC] STF 2025

2024-05-21 Thread Rémi Denis-Courmont
Le tiistaina 21. toukokuuta 2024, 21.43.44 EEST Thilo Borgmann via ffmpeg-devel 
a écrit :
> >> Same as above about that we should and STF would.
> >> Especially since no corporate interest usually pays anyone for these
> >> tasks
> > 
> > Sadly true, but...
> > 
> >> (in case of reviews it might of course be considered a good thing).
> > 
> > I think some review is better than none. There may be conflict of
> > interests, but they are weighed by the risk of being caught abusing the
> > review process.
> I hope you realize what you argue in favor of.

Yes. It's quoted above.

Are you claiming that *no* review is better than *some* review done in 
*public* for all to see by a paid professional just because the person is 
maybe biased?

First, even volunteers have their own biases. Any expert should have opinions 
from their experience, and that by definition makes them "biased".

And second, you can't have it both ways. Either we want people to be paid for 
review, and they will be answerable to their sponsor, or we want people to 
continue to work on their free time.

STF is an agency of the German government, applying German government 
policies. They certainly do seem to have their own biases, including on tech, 
e.g.: https://www.theregister.com/2024/05/20/huawei_germany_ban/ to take just 
the most recent example to come to mind.

> Reviews need to be unbiased and independent.

Ideally so but that's the land of utopia.

> STF sponsoring reviews could be an excellent help towards this.

If STF is willing to sponsor reviews, that's welcome. But that would certainly 
not be "independent".

> Corporate influence on the review process already happened in the past
> and the chance of getting caught is almost zero.

So how do you that it happened if it does not get caught?

And "I hope you realise that you are arguing for" Intel, Loongson, etc. 
employees to stop reviewing patches.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

2024-05-21 Thread Marton Balint



On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:


On 21/05/2024 09:16, Marton Balint wrote:



 On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


 On 19/05/2024 21:39, Marton Balint wrote:



  On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


  This commit adds a decoder for the frequency-domain part of USAC.


  [...]



  +/* Finish later */
  +static const enum AVChannel usac_ch_pos_to_av[64] = {
  +    [0] = AV_CHAN_FRONT_LEFT,
  +    [1] = AV_CHAN_FRONT_RIGHT,
  +    [2] = AV_CHAN_FRONT_CENTER,
  +    [3] = AV_CHAN_LOW_FREQUENCY,
  +    [4] = AV_CHAN_BACK_LEFT, // unsure
  +    [5] = AV_CHAN_BACK_RIGHT, // unsure
  +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
  +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
  +    [8] = 0, /* rear surround left is missing */
  +    [9] = 0, /* rear surround right is missing */
  +    [10] = AV_CHAN_BACK_CENTER,
  +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
  +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
  +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
  +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
  +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
  +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
  +    [17] = AV_CHAN_TOP_FRONT_LEFT,
  +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
  +    [19] = AV_CHAN_TOP_FRONT_CENTER,
  +    [20] = AV_CHAN_TOP_BACK_LEFT,
  +    [21] = AV_CHAN_TOP_BACK_RIGHT,
  +    [22] = AV_CHAN_TOP_BACK_CENTER,
  +    [23] = AV_CHAN_TOP_SIDE_LEFT,
  +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
  +    [25] = AV_CHAN_TOP_CENTER,
  +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
  +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
  +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
  +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
  +    [30] = 0, /* top left surround is missing */
  +    [31] = 0, /* top right surround is missing */
  +};


  Some comment would be nice about the source of this table (which
 document,
  which table).

  It looks very similar to the ISO channel positons used in mov_chan. I
  think we follow this mapping in most cases:

  Left  Surround is SIDE_LEFT
  Right Surround is SIDE_RIGHT
  Rear Surround Left  is BACK_LEFT
  Rear Surround Right is BACK_RIGHT

  So in your table [4] and [5] should be SIDE, [8] and [9] should be
 BACK.
  [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.

  Yes, Left/Right Surround and Left/Right Side Surround will be the same,
  but those are not present in commonly used layouts at the same time.

  Regards,
  Marton
  ___
  ffmpeg-devel mailing list
  ffmpeg-devel@ffmpeg.org
  https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

  To unsubscribe, visit link above, or email
  ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


 Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:

 0 L left front FL front left
 1 R right front FR front right
 2 C center front FC front centre
 3 LFE low frequency enhancement LFE1 low frequency effects-1
 4 Ls left surround LS left surround
 5 Rs right surround RS right surround
 6 Lc left front center FLc front left centre
 7 Rc right front center FRc front right centre
 8 Lsr rear surround left BL back left
 9 Rsr rear surround right BR back right
 10 Cs rear center BC back centre
 11 Lsd left surround direct LSd left surround direct
 12 Rsd right surround direct RSd right surround direct
 13 Lss left side surround SL side left
 14 Rss right side surround SR side right
 15 Lw left wide front FLw front left wide
 16 Rw right wide front FRw front right wide
 17 Lv left front vertical height TpFL top front left
 18 Rv right front vertical height TpFR top front right
 19 Cv center front vertical height TpFC top front centre
 20 Lvr left surround vertical height rear TpBL top back left
 21 Rvr right surround vertical height rear TpBR top back right
 22 Cvr center vertical height rear TpBC top back centre
 23 Lvss left vertical height side surround TpSiL top side left
 24 Rvss right vertical height side surround TpSiR top side right
 25 Ts top center surround TpC top centre
 26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
 27 Lb left front vertical bottom BtFL bottom front left
 28 Rb right front vertical bottom BtFR bottom front right
 29 Cb center front vertical bottom BtFC bottom front centre
 30 Lvs left vertical height surround TpLS top left surround
 31 Rvs right vertical height surround TpRS top right surround

 Third field is "Loudspeaker position", last field is "Loudspeaker
 position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each prefixed
 with an abbreviation.

 I've added the source to the table comment in the code.

 I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked
 earlier.


 Thanks. Later in the code when you actually use this I can see that you
 are creating a native layout:


 +    channel_config_idx = get_bits(gb, 5); /* channelConfigurationIndex
 */
 +    if (!channel_config_idx) {
 +    /* UsacChannelConfig() */
 +    uint8_t channel_pos[64];
 +    uint8

[FFmpeg-devel] [PATCH] avfilter/framesync: fix forward EOF pts

2024-05-21 Thread Nicolas Gaullier
Note1: when the EOF pts is not accurate enough, the last frame
can be dropped by vf_fps with default rounding.

Note2: vf_scale use framesync since e82a3997cdd6c0894869b33ba42430ac3,
so this is a very commonplace scenario.

For example:
./ffprobe -f lavfi testsrc=d=1,scale,fps -of flat \
  -count_frames -show_entries stream=nb_read_frames

Before:
streams.stream.0.nb_read_frames="24"

After:
streams.stream.0.nb_read_frames="25"
---
 libavfilter/framesync.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/libavfilter/framesync.c b/libavfilter/framesync.c
index 535fbe9c7c..28a992ba6d 100644
--- a/libavfilter/framesync.c
+++ b/libavfilter/framesync.c
@@ -103,14 +103,14 @@ int ff_framesync_init(FFFrameSync *fs, AVFilterContext 
*parent, unsigned nb_in)
 return 0;
 }
 
-static void framesync_eof(FFFrameSync *fs)
+static void framesync_eof(FFFrameSync *fs, int64_t pts)
 {
 fs->eof = 1;
 fs->frame_ready = 0;
-ff_outlink_set_status(fs->parent->outputs[0], AVERROR_EOF, AV_NOPTS_VALUE);
+ff_outlink_set_status(fs->parent->outputs[0], AVERROR_EOF, pts);
 }
 
-static void framesync_sync_level_update(FFFrameSync *fs)
+static void framesync_sync_level_update(FFFrameSync *fs, int64_t eof_pts)
 {
 unsigned i, level = 0;
 
@@ -131,7 +131,7 @@ static void framesync_sync_level_update(FFFrameSync *fs)
 if (level)
 fs->sync_level = level;
 else
-framesync_eof(fs);
+framesync_eof(fs, eof_pts);
 }
 
 int ff_framesync_configure(FFFrameSync *fs)
@@ -179,7 +179,7 @@ int ff_framesync_configure(FFFrameSync *fs)
 for (i = 0; i < fs->nb_in; i++)
 fs->in[i].pts = fs->in[i].pts_next = AV_NOPTS_VALUE;
 fs->sync_level = UINT_MAX;
-framesync_sync_level_update(fs);
+framesync_sync_level_update(fs, AV_NOPTS_VALUE);
 
 return 0;
 }
@@ -200,7 +200,7 @@ static int framesync_advance(FFFrameSync *fs)
 if (fs->in[i].have_next && fs->in[i].pts_next < pts)
 pts = fs->in[i].pts_next;
 if (pts == INT64_MAX) {
-framesync_eof(fs);
+framesync_eof(fs, AV_NOPTS_VALUE);
 break;
 }
 for (i = 0; i < fs->nb_in; i++) {
@@ -222,7 +222,7 @@ static int framesync_advance(FFFrameSync *fs)
 fs->frame_ready = 1;
 if (fs->in[i].state == STATE_EOF &&
 fs->in[i].after == EXT_STOP)
-framesync_eof(fs);
+framesync_eof(fs, AV_NOPTS_VALUE);
 }
 }
 if (fs->frame_ready)
@@ -255,15 +255,14 @@ static void framesync_inject_frame(FFFrameSync *fs, 
unsigned in, AVFrame *frame)
 fs->in[in].have_next  = 1;
 }
 
-static void framesync_inject_status(FFFrameSync *fs, unsigned in, int status, 
int64_t pts)
+static void framesync_inject_status(FFFrameSync *fs, unsigned in, int status, 
int64_t eof_pts)
 {
 av_assert0(!fs->in[in].have_next);
-pts = fs->in[in].state != STATE_RUN || fs->in[in].after == EXT_INFINITY
-? INT64_MAX : framesync_pts_extrapolate(fs, in, fs->in[in].pts);
 fs->in[in].sync = 0;
-framesync_sync_level_update(fs);
+framesync_sync_level_update(fs, status == AVERROR_EOF ? eof_pts : 
AV_NOPTS_VALUE);
 fs->in[in].frame_next = NULL;
-fs->in[in].pts_next   = pts;
+fs->in[in].pts_next   = fs->in[in].state != STATE_RUN || fs->in[in].after 
== EXT_INFINITY
+? INT64_MAX : framesync_pts_extrapolate(fs, in, 
fs->in[in].pts);
 fs->in[in].have_next  = 1;
 }
 
-- 
2.30.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] lavc/vvc_mc: R-V V avg w_avg

2024-05-21 Thread flow gg
Reordered some here.

 于2024年5月22日周三 03:24写道:

> From: sunyuechi 
>
>   C908   X60
> avg_8_2x2_c:1.01.0
> avg_8_2x2_rvv_i32  :0.70.7
> avg_8_2x4_c:2.02.0
> avg_8_2x4_rvv_i32  :1.00.7
> avg_8_2x8_c:4.03.7
> avg_8_2x8_rvv_i32  :1.51.2
> avg_8_2x16_c   :7.57.7
> avg_8_2x16_rvv_i32 :2.72.5
> avg_8_2x32_c   :   14.2   15.0
> avg_8_2x32_rvv_i32 :5.04.5
> avg_8_2x64_c   :   28.5   30.2
> avg_8_2x64_rvv_i32 :9.58.7
> avg_8_2x128_c  :   80.0   70.5
> avg_8_2x128_rvv_i32:   50.7   41.2
> avg_8_4x2_c:1.72.0
> avg_8_4x2_rvv_i32  :0.70.7
> avg_8_4x4_c:3.53.7
> avg_8_4x4_rvv_i32  :1.21.0
> avg_8_4x8_c:6.77.0
> avg_8_4x8_rvv_i32  :1.51.2
> avg_8_4x16_c   :   13.2   14.0
> avg_8_4x16_rvv_i32 :2.72.5
> avg_8_4x32_c   :   26.2   27.7
> avg_8_4x32_rvv_i32 :5.04.5
> avg_8_4x64_c   :   52.2   55.0
> avg_8_4x64_rvv_i32 :9.58.7
> avg_8_4x128_c  :  146.0  117.5
> avg_8_4x128_rvv_i32:   53.2   40.5
> avg_8_8x2_c:3.53.5
> avg_8_8x2_rvv_i32  :0.70.7
> avg_8_8x4_c:6.56.5
> avg_8_8x4_rvv_i32  :1.21.0
> avg_8_8x8_c:   12.7   13.2
> avg_8_8x8_rvv_i32  :2.01.5
> avg_8_8x16_c   :   25.2   26.2
> avg_8_8x16_rvv_i32 :3.52.5
> avg_8_8x32_c   :   50.0   52.7
> avg_8_8x32_rvv_i32 :6.54.7
> avg_8_8x64_c   :   99.7  105.0
> avg_8_8x64_rvv_i32 :   12.58.5
> avg_8_8x128_c  :  225.7  218.0
> avg_8_8x128_rvv_i32:   78.0   39.2
> avg_8_16x2_c   :6.26.7
> avg_8_16x2_rvv_i32 :1.20.7
> avg_8_16x4_c   :   12.2   12.7
> avg_8_16x4_rvv_i32 :2.01.2
> avg_8_16x8_c   :   24.7   26.0
> avg_8_16x8_rvv_i32 :3.51.7
> avg_8_16x16_c  :   49.0   51.5
> avg_8_16x16_rvv_i32:6.23.2
> avg_8_16x32_c  :   97.5  102.5
> avg_8_16x32_rvv_i32:   11.55.7
> avg_8_16x64_c  :  212.5  204.7
> avg_8_16x64_rvv_i32:   22.5   11.0
> avg_8_16x128_c :  411.2  418.2
> avg_8_16x128_rvv_i32   :   76.0   47.7
> avg_8_32x2_c   :   12.2   12.7
> avg_8_32x2_rvv_i32 :2.01.2
> avg_8_32x4_c   :   24.2   25.5
> avg_8_32x4_rvv_i32 :3.21.7
> avg_8_32x8_c   :   48.5   50.7
> avg_8_32x8_rvv_i32 :5.73.2
> avg_8_32x16_c  :   96.5  101.2
> avg_8_32x16_rvv_i32:   10.75.7
> avg_8_32x32_c  :  192.5  202.5
> avg_8_32x32_rvv_i32:   20.7   10.5
> avg_8_32x64_c  :  411.2  404.5
> avg_8_32x64_rvv_i32:   41.0   20.5
> avg_8_32x128_c :  834.7  855.2
> avg_8_32x128_rvv_i32   :  151.2  118.7
> avg_8_64x2_c

[FFmpeg-devel] [PATCH] lavc/vvc_mc: R-V V avg w_avg

2024-05-21 Thread uk7b
From: sunyuechi 

  C908   X60
avg_8_2x2_c:1.01.0
avg_8_2x2_rvv_i32  :0.70.7
avg_8_2x4_c:2.02.0
avg_8_2x4_rvv_i32  :1.00.7
avg_8_2x8_c:4.03.7
avg_8_2x8_rvv_i32  :1.51.2
avg_8_2x16_c   :7.57.7
avg_8_2x16_rvv_i32 :2.72.5
avg_8_2x32_c   :   14.2   15.0
avg_8_2x32_rvv_i32 :5.04.5
avg_8_2x64_c   :   28.5   30.2
avg_8_2x64_rvv_i32 :9.58.7
avg_8_2x128_c  :   80.0   70.5
avg_8_2x128_rvv_i32:   50.7   41.2
avg_8_4x2_c:1.72.0
avg_8_4x2_rvv_i32  :0.70.7
avg_8_4x4_c:3.53.7
avg_8_4x4_rvv_i32  :1.21.0
avg_8_4x8_c:6.77.0
avg_8_4x8_rvv_i32  :1.51.2
avg_8_4x16_c   :   13.2   14.0
avg_8_4x16_rvv_i32 :2.72.5
avg_8_4x32_c   :   26.2   27.7
avg_8_4x32_rvv_i32 :5.04.5
avg_8_4x64_c   :   52.2   55.0
avg_8_4x64_rvv_i32 :9.58.7
avg_8_4x128_c  :  146.0  117.5
avg_8_4x128_rvv_i32:   53.2   40.5
avg_8_8x2_c:3.53.5
avg_8_8x2_rvv_i32  :0.70.7
avg_8_8x4_c:6.56.5
avg_8_8x4_rvv_i32  :1.21.0
avg_8_8x8_c:   12.7   13.2
avg_8_8x8_rvv_i32  :2.01.5
avg_8_8x16_c   :   25.2   26.2
avg_8_8x16_rvv_i32 :3.52.5
avg_8_8x32_c   :   50.0   52.7
avg_8_8x32_rvv_i32 :6.54.7
avg_8_8x64_c   :   99.7  105.0
avg_8_8x64_rvv_i32 :   12.58.5
avg_8_8x128_c  :  225.7  218.0
avg_8_8x128_rvv_i32:   78.0   39.2
avg_8_16x2_c   :6.26.7
avg_8_16x2_rvv_i32 :1.20.7
avg_8_16x4_c   :   12.2   12.7
avg_8_16x4_rvv_i32 :2.01.2
avg_8_16x8_c   :   24.7   26.0
avg_8_16x8_rvv_i32 :3.51.7
avg_8_16x16_c  :   49.0   51.5
avg_8_16x16_rvv_i32:6.23.2
avg_8_16x32_c  :   97.5  102.5
avg_8_16x32_rvv_i32:   11.55.7
avg_8_16x64_c  :  212.5  204.7
avg_8_16x64_rvv_i32:   22.5   11.0
avg_8_16x128_c :  411.2  418.2
avg_8_16x128_rvv_i32   :   76.0   47.7
avg_8_32x2_c   :   12.2   12.7
avg_8_32x2_rvv_i32 :2.01.2
avg_8_32x4_c   :   24.2   25.5
avg_8_32x4_rvv_i32 :3.21.7
avg_8_32x8_c   :   48.5   50.7
avg_8_32x8_rvv_i32 :5.73.2
avg_8_32x16_c  :   96.5  101.2
avg_8_32x16_rvv_i32:   10.75.7
avg_8_32x32_c  :  192.5  202.5
avg_8_32x32_rvv_i32:   20.7   10.5
avg_8_32x64_c  :  411.2  404.5
avg_8_32x64_rvv_i32:   41.0   20.5
avg_8_32x128_c :  834.7  855.2
avg_8_32x128_rvv_i32   :  151.2  118.7
avg_8_64x2_c   :   24.0   25.2
avg_8_64x2_rvv_i32 :3.21.7
avg_8_64x4_c   :   48.2   50.5
avg_8_64x4_rvv_i32   

Re: [FFmpeg-devel] FFmpeg 7.0.1

2024-05-21 Thread Tristan Matthews
On Thu, Apr 4, 2024 at 8:23 PM Michael Niedermayer
 wrote:
>
> Hi all
>
> i intend to make a 7.0.1 in a few weeks for all the bug fixes that didnt
> make it in 7.0

Any update on this?

-t
>
> thx
>
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> It is dangerous to be right in matters on which the established authorities
> are wrong. -- Voltaire
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 01/13] avformat/flvenc: Implement support for multi-track video

2024-05-21 Thread Cosmin Stejerean via ffmpeg-devel


> On May 21, 2024, at 11:54 AM, Cosmin Stejerean via ffmpeg-devel 
>  wrote:
> 
> 
> However I've found a workaround. By setting my git send-email from to just 
> "cos...@cosmin.at " rather than "Cosmin Stejerean 
> mailto:cos...@cosmin.at>>" then the emails that go out 
> will have the "From: " as the first line of the body, so the patches apply 
> properly. You can see my most recent patch for example.
> 

The email client is trying to be too helpful here adding extra formatting to 
the message body. The .gitconfig should look something like this, using your 
email address without <> and without the name.

from = cos...@cosmin.at

That works in my testing.

- Cosmin



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 01/13] avformat/flvenc: Implement support for multi-track video

2024-05-21 Thread Cosmin Stejerean via ffmpeg-devel


> On May 21, 2024, at 11:54 AM, Cosmin Stejerean via ffmpeg-devel 
>  wrote:
> 
> 
> However I've found a workaround. By setting my git send-email from to just 
> "cos...@cosmin.at " rather than "Cosmin Stejerean 
> mailto:cos...@cosmin.at>>" then the emails that go out 
> will have the "From: " as the first line of the body, so the patches apply 
> properly. You can see my most recent patch for example.
> 

The email client is trying to be too helpful here adding extra formatting to 
the message body. The .gitconfig should look something like this, using your 
email address without <> and without the name.

from = cos...@cosmin.at

That works in my testing.

- Cosmin



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 01/13] avformat/flvenc: Implement support for multi-track video

2024-05-21 Thread Cosmin Stejerean via ffmpeg-devel


> On May 21, 2024, at 11:48 AM, Dennis Sädtler via ffmpeg-devel 
>  wrote:
> 
>> From: Dennis Sädtler via ffmpeg-devel 
> 
> I wonder what happened here, did I make a mistake when submitting the
> original patch to the ML so the actual commit author name/email got
> lost?
> 
> Should be the same as the signed-off section based on the repo I
> submitted it from:
> https://github.com/derrod/ffmpeg/commit/25f1700cffa00fcd04bcc27efce077a93e7f5142
> 

The problem is the mailing list rewrites the message (to for example add the 
footer at the bottom). It cannot do that and keep the from email address if you 
have email security configured properly for the domain (as it would get 
rejected). 

So the mailing list changes the from to " via ffmpeg-devel 
". Furthermore git send-email won't include "From:" in 
the patch body if the commit authorship matches the from email address on git 
send-email. 

However I've found a workaround. By setting my git send-email from to just 
"cos...@cosmin.at " rather than "Cosmin Stejerean 
mailto:cos...@cosmin.at>>" then the emails that go out will 
have the "From: " as the first line of the body, so the patches apply properly. 
You can see my most recent patch for example.

You can add an entry to .mailmap to handle any previous commits that have been 
pushed using the "via ffmpeg-devel" rewrite, see my entry for example.


- Cosmin




___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 01/13] avformat/flvenc: Implement support for multi-track video

2024-05-21 Thread Timo Rothenpieler

On 21.05.2024 20:48, Dennis Sädtler wrote:

From: Dennis Sädtler via ffmpeg-devel 


I wonder what happened here, did I make a mistake when submitting the
original patch to the ML so the actual commit author name/email got
lost?


That's just what happens if your mailserver has strict SPF/DMARC configured.
Since no other server can send mails from its domain then, Mailing-Lists 
are forced to mangle it.



Should be the same as the signed-off section based on the repo I
submitted it from:
https://github.com/derrod/ffmpeg/commit/25f1700cffa00fcd04bcc27efce077a93e7f5142


I'll fix it locally, hadn't noticed that yet.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 01/13] avformat/flvenc: Implement support for multi-track video

2024-05-21 Thread Dennis Sädtler via ffmpeg-devel
> From: Dennis Sädtler via ffmpeg-devel 

I wonder what happened here, did I make a mistake when submitting the
original patch to the ML so the actual commit author name/email got
lost?

Should be the same as the signed-off section based on the repo I
submitted it from:
https://github.com/derrod/ffmpeg/commit/25f1700cffa00fcd04bcc27efce077a93e7f5142

Cheers,
Dennis
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [RFC] STF 2025

2024-05-21 Thread Thilo Borgmann via ffmpeg-devel



On 20.05.24 20:51, Rémi Denis-Courmont wrote:

Le sunnuntaina 19. toukokuuta 2024, 14.29.43 EEST Thilo Borgmann via ffmpeg-
devel a écrit :

[...]


* Fund administrative / maintainance work (one example is the mailman
upgrade that is needed>>
   with the next OS upgrade on one of our servers (this is not as trivial
   as one might expect). Another example here may be some git related
   tools if we find something that theres a broad consensus about.


I agree that this should be paid but I would expect that STF would not be
too keen on it, not that I'd know really.

We should absolutely pay for such activity and STF is very well willing
to fund such things.


Again, I don't know but that seems to stray from their stated goals. Also this
is most certainly not a full-time job, and it requires a very high level of
trust. In practice, what this really means is paying Michael.

It is more of a question whether STF is willing to pay for this, and whether a
reasonable task description with a reasonable average prorated workload and a
pay can be defined.


Again, I do know. "...STF is very well willing to fund such things." 
does not sound like an assumption to me.




And again, it is completely reasonable to be paid for that, and also for
code reviews and writing test cases (if we want to complete the menial
task list), but I am perplexed as to STF's stance on that.



Same as above about that we should and STF would.
Especially since no corporate interest usually pays anyone for these tasks


Sadly true, but...


(in case of reviews it might of course be considered a good thing).


I think some review is better than none. There may be conflict of interests,
but they are weighed by the risk of being caught abusing the review process.


I hope you realize what you argue in favor of. Reviews need to be 
unbiased and independent. STF sponsoring reviews could be an excellent 
help towards this.


Corporate influence on the review process already happened in the past 
and the chance of getting caught is almost zero.


About the rest, I think you already said that you don't find funding 
non-full-time positions useful in another thread - no need to reiterate 
that I don't agree with that nor with your assumptions that should lead 
to that.


-Thilo
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2] avcodec/dovi - correctly read el_bit_depth_minus8 and ext_mapping_idc

2024-05-21 Thread Cosmin Stejerean via ffmpeg-devel
From: Cosmin Stejerean 

These two fields are coded together into a single 16 bit integer with upper 8
bits for ext_mapping_idc and lower 8 bits for el_bit_depth_minus8.

Furthermore ext_mapping_idc has two components, upper 3 bits and lower 5 bits.

---
 libavcodec/dovi_rpudec.c | 7 ++-
 libavcodec/dovi_rpuenc.c | 4 +++-
 libavutil/dovi_meta.h| 2 ++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/libavcodec/dovi_rpudec.c b/libavcodec/dovi_rpudec.c
index 7c7eda9d09..af41ab5827 100644
--- a/libavcodec/dovi_rpudec.c
+++ b/libavcodec/dovi_rpudec.c
@@ -411,13 +411,18 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, 
size_t rpu_size,
 
 if ((hdr->rpu_format & 0x700) == 0) {
 int bl_bit_depth_minus8 = get_ue_golomb_31(gb);
-int el_bit_depth_minus8 = get_ue_golomb_31(gb);
+int el_bit_depth_minus8_and_ext_mapping_idc = 
get_ue_golomb_long(gb);
+int el_bit_depth_minus8 = el_bit_depth_minus8_and_ext_mapping_idc 
& 0xFF; // lowest 8 bits
+int ext_mapping_idc = (el_bit_depth_minus8_and_ext_mapping_idc & 
0xFF00) >> 8; // upper 8 bits
+
 int vdr_bit_depth_minus8 = get_ue_golomb_31(gb);
 VALIDATE(bl_bit_depth_minus8, 0, 8);
 VALIDATE(el_bit_depth_minus8, 0, 8);
 VALIDATE(vdr_bit_depth_minus8, 0, 8);
 hdr->bl_bit_depth = bl_bit_depth_minus8 + 8;
 hdr->el_bit_depth = el_bit_depth_minus8 + 8;
+hdr->ext_mapping_idc_0_4 = ext_mapping_idc & 0x1F; // lowest 5 
bits of ext_mapping_idc
+hdr->ext_mapping_idc_5_7 = (ext_mapping_idc & 0xE0) >> 5; // upper 
3 bits of ext_mapping_idc
 hdr->vdr_bit_depth = vdr_bit_depth_minus8 + 8;
 hdr->spatial_resampling_filter_flag = get_bits1(gb);
 skip_bits(gb, 3); /* reserved_zero_3bits */
diff --git a/libavcodec/dovi_rpuenc.c b/libavcodec/dovi_rpuenc.c
index 3c3e0f84c0..91c0a85050 100644
--- a/libavcodec/dovi_rpuenc.c
+++ b/libavcodec/dovi_rpuenc.c
@@ -444,6 +444,7 @@ int ff_dovi_rpu_generate(DOVIContext *s, const 
AVDOVIMetadata *metadata,
 int vdr_dm_metadata_changed, vdr_rpu_id, use_prev_vdr_rpu, profile,
 buffer_size, rpu_size, pad, zero_run;
 int num_ext_blocks_v1, num_ext_blocks_v2;
+uint8_t ext_mapping_idc;
 uint32_t crc;
 uint8_t *dst;
 if (!metadata) {
@@ -551,7 +552,8 @@ int ff_dovi_rpu_generate(DOVIContext *s, const 
AVDOVIMetadata *metadata,
 put_bits(pb, 1, hdr->bl_video_full_range_flag);
 if ((hdr->rpu_format & 0x700) == 0) {
 set_ue_golomb(pb, hdr->bl_bit_depth - 8);
-set_ue_golomb(pb, hdr->el_bit_depth - 8);
+ext_mapping_idc = (hdr->ext_mapping_idc_5_7 << 5) | 
hdr->ext_mapping_idc_0_4;
+set_ue_golomb(pb, (ext_mapping_idc << 8) | hdr->el_bit_depth - 8);
 set_ue_golomb(pb, hdr->vdr_bit_depth - 8);
 put_bits(pb, 1, hdr->spatial_resampling_filter_flag);
 put_bits(pb, 3, 0); /* reserved_zero_3bits */
diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
index e10332f8d7..d01bfe19fe 100644
--- a/libavutil/dovi_meta.h
+++ b/libavutil/dovi_meta.h
@@ -87,6 +87,8 @@ typedef struct AVDOVIRpuDataHeader {
 uint8_t bl_video_full_range_flag;
 uint8_t bl_bit_depth; /* [8, 16] */
 uint8_t el_bit_depth; /* [8, 16] */
+uint8_t ext_mapping_idc_0_4; /* extended base layer inverse mapping 
indicator */
+uint8_t ext_mapping_idc_5_7; /* reserved */
 uint8_t vdr_bit_depth; /* [8, 16] */
 uint8_t spatial_resampling_filter_flag;
 uint8_t el_spatial_resampling_filter_flag;
-- 
2.42.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 2/5] lavc/vp9dsp: R-V V mc bilin h v

2024-05-21 Thread flow gg
Do macros definition also need a comma? I noticed that many of my old code
and SiFive's code don't have a comma

Rémi Denis-Courmont  于2024年5月22日周三 02:29写道:

> Le tiistaina 21. toukokuuta 2024, 20.13.16 EEST u...@foxmail.com a écrit :
> > From: sunyuechi 
>
> > diff --git a/libavcodec/riscv/vp9_mc_rvv.S
> b/libavcodec/riscv/vp9_mc_rvv.S
> > index 7cb38ec94a..739380d9a9 100644
> > --- a/libavcodec/riscv/vp9_mc_rvv.S
> > +++ b/libavcodec/riscv/vp9_mc_rvv.S
> > @@ -53,6 +53,49 @@ func ff_vp9_avg\len\()_rvv, zve32x
> >  endfunc
> >  .endm
> >
> > +.macro bilin_load dst len op type mn
>
> Commas, please.
>
> --
> Rémi Denis-Courmont
> http://www.remlab.net/
>
>
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 2/5] lavc/vp9dsp: R-V V mc bilin h v

2024-05-21 Thread Rémi Denis-Courmont
Le tiistaina 21. toukokuuta 2024, 20.13.16 EEST u...@foxmail.com a écrit :
> From: sunyuechi 

> diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
> index 7cb38ec94a..739380d9a9 100644
> --- a/libavcodec/riscv/vp9_mc_rvv.S
> +++ b/libavcodec/riscv/vp9_mc_rvv.S
> @@ -53,6 +53,49 @@ func ff_vp9_avg\len\()_rvv, zve32x
>  endfunc
>  .endm
> 
> +.macro bilin_load dst len op type mn

Commas, please.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

2024-05-21 Thread Lynne via ffmpeg-devel

On 21/05/2024 09:16, Marton Balint wrote:



On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


On 19/05/2024 21:39, Marton Balint wrote:



 On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:


 This commit adds a decoder for the frequency-domain part of USAC.


 [...]



 +/* Finish later */
 +static const enum AVChannel usac_ch_pos_to_av[64] = {
 +    [0] = AV_CHAN_FRONT_LEFT,
 +    [1] = AV_CHAN_FRONT_RIGHT,
 +    [2] = AV_CHAN_FRONT_CENTER,
 +    [3] = AV_CHAN_LOW_FREQUENCY,
 +    [4] = AV_CHAN_BACK_LEFT, // unsure
 +    [5] = AV_CHAN_BACK_RIGHT, // unsure
 +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
 +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
 +    [8] = 0, /* rear surround left is missing */
 +    [9] = 0, /* rear surround right is missing */
 +    [10] = AV_CHAN_BACK_CENTER,
 +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
 +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
 +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
 +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
 +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
 +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
 +    [17] = AV_CHAN_TOP_FRONT_LEFT,
 +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
 +    [19] = AV_CHAN_TOP_FRONT_CENTER,
 +    [20] = AV_CHAN_TOP_BACK_LEFT,
 +    [21] = AV_CHAN_TOP_BACK_RIGHT,
 +    [22] = AV_CHAN_TOP_BACK_CENTER,
 +    [23] = AV_CHAN_TOP_SIDE_LEFT,
 +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
 +    [25] = AV_CHAN_TOP_CENTER,
 +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
 +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
 +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
 +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
 +    [30] = 0, /* top left surround is missing */
 +    [31] = 0, /* top right surround is missing */
 +};


 Some comment would be nice about the source of this table (which 
document,

 which table).

 It looks very similar to the ISO channel positons used in mov_chan. I
 think we follow this mapping in most cases:

 Left  Surround is SIDE_LEFT
 Right Surround is SIDE_RIGHT
 Rear Surround Left  is BACK_LEFT
 Rear Surround Right is BACK_RIGHT

 So in your table [4] and [5] should be SIDE, [8] and [9] should be 
BACK.

 [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.

 Yes, Left/Right Surround and Left/Right Side Surround will be the same,
 but those are not present in commonly used layouts at the same time.

 Regards,
 Marton
 ___
 ffmpeg-devel mailing list
 ffmpeg-devel@ffmpeg.org
 https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

 To unsubscribe, visit link above, or email
 ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:

0 L left front FL front left
1 R right front FR front right
2 C center front FC front centre
3 LFE low frequency enhancement LFE1 low frequency effects-1
4 Ls left surround LS left surround
5 Rs right surround RS right surround
6 Lc left front center FLc front left centre
7 Rc right front center FRc front right centre
8 Lsr rear surround left BL back left
9 Rsr rear surround right BR back right
10 Cs rear center BC back centre
11 Lsd left surround direct LSd left surround direct
12 Rsd right surround direct RSd right surround direct
13 Lss left side surround SL side left
14 Rss right side surround SR side right
15 Lw left wide front FLw front left wide
16 Rw right wide front FRw front right wide
17 Lv left front vertical height TpFL top front left
18 Rv right front vertical height TpFR top front right
19 Cv center front vertical height TpFC top front centre
20 Lvr left surround vertical height rear TpBL top back left
21 Rvr right surround vertical height rear TpBR top back right
22 Cvr center vertical height rear TpBC top back centre
23 Lvss left vertical height side surround TpSiL top side left
24 Rvss right vertical height side surround TpSiR top side right
25 Ts top center surround TpC top centre
26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
27 Lb left front vertical bottom BtFL bottom front left
28 Rb right front vertical bottom BtFR bottom front right
29 Cb center front vertical bottom BtFC bottom front centre
30 Lvs left vertical height surround TpLS top left surround
31 Rvs right vertical height surround TpRS top right surround

Third field is "Loudspeaker position", last field is "Loudspeaker
position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each 
prefixed with an abbreviation.


I've added the source to the table comment in the code.

I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked 
earlier.


Thanks. Later in the code when you actually use this I can see that you 
are creating a native layout:


+    channel_config_idx = get_bits(gb, 5); /* 
channelConfigurationIndex */

+    if (!channel_config_idx) {
+    /* UsacChannelConfig() */
+    uint8_t channel_pos[64];
+    uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /* 
numOutChannels */

+    if (nb_channels >= 64)
+    return AVERROR(EINVAL);
+
+    a

[FFmpeg-devel] [PATCH 1/2] checkasm/riscv: test misaligned before V

2024-05-21 Thread Rémi Denis-Courmont
Otherwise V functions mask scalar misaligned ones.
---
 tests/checkasm/checkasm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 31ca9f6e2b..76835ab267 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -278,6 +278,7 @@ static const struct {
 { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
 #elif ARCH_RISCV
 { "RVI",  "rvi",  AV_CPU_FLAG_RVI },
+{ "misaligned", "misaligned", AV_CPU_FLAG_RV_MISALIGNED },
 { "RVF",  "rvf",  AV_CPU_FLAG_RVF },
 { "RVD",  "rvd",  AV_CPU_FLAG_RVD },
 { "RVBaddr",  "rvb_a",AV_CPU_FLAG_RVB_ADDR },
@@ -287,7 +288,6 @@ static const struct {
 { "RVVi64",   "rvv_i64",  AV_CPU_FLAG_RVV_I64 },
 { "RVVf64",   "rvv_f64",  AV_CPU_FLAG_RVV_F64 },
 { "RV_Zvbb",  "rv_zvbb",  AV_CPU_FLAG_RV_ZVBB },
-{ "misaligned", "misaligned", AV_CPU_FLAG_RV_MISALIGNED },
 #elif ARCH_MIPS
 { "MMI",  "mmi",  AV_CPU_FLAG_MMI },
 { "MSA",  "msa",  AV_CPU_FLAG_MSA },
-- 
2.45.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/2] lavc/pixblockdsp: add scalar get_pixels_unaligned

2024-05-21 Thread Rémi Denis-Courmont
The code is already there, we just need to use it.

get_pixels_unaligned_c: 2.2
get_pixels_unaligned_misaligned: 1.7
---
 libavcodec/riscv/pixblockdsp_init.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libavcodec/riscv/pixblockdsp_init.c 
b/libavcodec/riscv/pixblockdsp_init.c
index b205841101..2735776105 100644
--- a/libavcodec/riscv/pixblockdsp_init.c
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -55,6 +55,13 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
 c->get_pixels = ff_get_pixels_8_rvi;
 }
 
+if (cpu_flags & AV_CPU_FLAG_RV_MISALIGNED) {
+if (high_bit_depth)
+c->get_pixels_unaligned = ff_get_pixels_16_rvi;
+else
+c->get_pixels_unaligned = ff_get_pixels_8_rvi;
+}
+
 #if HAVE_RVV
 if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
 c->diff_pixels = ff_diff_pixels_unaligned_rvv;
-- 
2.45.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] lavc/vvc_mc: R-V V avg w_avg

2024-05-21 Thread flow gg
> I would expect that you can get better performance by interleaving scalar
and
vector stuff, and possibly also vector loads and vector arithmetic.

Okay, I will try

> These labels lead to nowhere? If you actually mean to implicitly fall
through
to the next function, you can use the function name directly rather than add
odd labels.

These labels are used to convert variable parameters to constants to
achieve better performance and prepare for the next .irp. Some names are
strange because they cannot be duplicated. Here, there is only one
function, which should be executed after going through these labels?

Rémi Denis-Courmont  于2024年5月22日周三 00:04写道:

> Le tiistaina 21. toukokuuta 2024, 10.37.51 EEST u...@foxmail.com a écrit :
> > From: sunyuechi 
> > ---
> >  libavcodec/riscv/Makefile  |   2 +
> >  libavcodec/riscv/vvc_mc_rvv.S  | 312 +
> >  libavcodec/riscv/vvcdsp_init.c |  76 
> >  libavcodec/vvc/dsp.c   |   4 +-
> >  libavcodec/vvc/dsp.h   |   1 +
> >  5 files changed, 394 insertions(+), 1 deletion(-)
> >  create mode 100644 libavcodec/riscv/vvc_mc_rvv.S
> >  create mode 100644 libavcodec/riscv/vvcdsp_init.c
> >
> > diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> > index 27b268ae39..6297664fc9 100644
> > --- a/libavcodec/riscv/Makefile
> > +++ b/libavcodec/riscv/Makefile
> > @@ -68,3 +68,5 @@ RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o
> \
> >  RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
> >  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
> >  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> > +OBJS-$(CONFIG_VVC_DECODER) += riscv/vvcdsp_init.o
> > +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc_mc_rvv.o
> > diff --git a/libavcodec/riscv/vvc_mc_rvv.S
> b/libavcodec/riscv/vvc_mc_rvv.S
> > new file mode 100644
> > index 00..26a6afba1f
> > --- /dev/null
> > +++ b/libavcodec/riscv/vvc_mc_rvv.S
> > @@ -0,0 +1,312 @@
> > +/*
> > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> > (ISCAS). + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA + */
> > +
> > +#include "libavutil/riscv/asm.S"
> > +
> > +.macro vsetvlstatic8 w vlen is_w
> > +.if \w <= 2
> > +vsetivlizero, \w, e8, mf8, ta, ma
> > +.elseif \w <= 4 && \vlen == 128
> > +vsetivlizero, \w, e8, mf4, ta, ma
> > +.elseif \w <= 4 && \vlen >= 256
> > +vsetivlizero, \w, e8, mf8, ta, ma
> > +.elseif \w <= 8 && \vlen == 128
> > +vsetivlizero, \w, e8, mf2, ta, ma
> > +.elseif \w <= 8 && \vlen >= 256
> > +vsetivlizero, \w, e8, mf4, ta, ma
> > +.elseif \w <= 16 && \vlen == 128
> > +vsetivlizero, \w, e8, m1, ta, ma
> > +.elseif \w <= 16 && \vlen >= 256
> > +vsetivlizero, \w, e8, mf2, ta, ma
> > +.elseif \w <= 32 && \vlen >= 256
> > +li t0, \w
> > +vsetvli zero, t0, e8, m1, ta, ma
> > +.elseif \w <= (\vlen / 4) || \is_w
> > +li t0, 64
> > +vsetvli zero, t0, e8, m2, ta, ma
> > +.else
> > +li t0, \w
> > +vsetvli zero, t0, e8, m4, ta, ma
> > +.endif
> > +.endm
> > +
> > +.macro vsetvlstatic16 w vlen is_w
> > +.if \w <= 2
> > +vsetivlizero, \w, e16, mf4, ta, ma
> > +.elseif \w <= 4 && \vlen == 128
> > +vsetivlizero, \w, e16, mf2, ta, ma
> > +.elseif \w <= 4 && \vlen >= 256
> > +vsetivlizero, \w, e16, mf4, ta, ma
> > +.elseif \w <= 8 && \vlen == 128
> > +vsetivlizero, \w, e16, m1, ta, ma
> > +.elseif \w <= 8 && \vlen >= 256
> > +vsetivlizero, \w, e16, mf2, ta, ma
> > +.elseif \w <= 16 && \vlen == 128
> > +vsetivlizero, \w, e16, m2, ta, ma
> > +.elseif \w <= 16 && \vlen >= 256
> > +vsetivlizero, \w, e16, m1, ta, ma
> > +.elseif \w <= 32 && \vlen >= 25

Re: [FFmpeg-devel] [PATCH v2 1/5] lavc/vp9dsp: R-V V mc avg

2024-05-21 Thread flow gg
> Please put commas between operands.
> This should probably be ff_avg_vp9 or something slightly more specific.

Updated here.

 于2024年5月22日周三 01:14写道:

> From: sunyuechi 
>
> C908:
> vp9_avg4_8bpp_c: 1.2
> vp9_avg4_8bpp_rvv_i64: 1.0
> vp9_avg8_8bpp_c: 3.7
> vp9_avg8_8bpp_rvv_i64: 1.5
> vp9_avg16_8bpp_c: 14.7
> vp9_avg16_8bpp_rvv_i64: 3.5
> vp9_avg32_8bpp_c: 57.7
> vp9_avg32_8bpp_rvv_i64: 10.0
> vp9_avg64_8bpp_c: 229.0
> vp9_avg64_8bpp_rvv_i64: 31.7
> ---
>  libavcodec/riscv/Makefile  |  3 +-
>  libavcodec/riscv/vp9_mc_rvv.S  | 58 ++
>  libavcodec/riscv/vp9dsp.h  |  4 +--
>  libavcodec/riscv/vp9dsp_init.c | 18 +++
>  4 files changed, 80 insertions(+), 3 deletions(-)
>  create mode 100644 libavcodec/riscv/vp9_mc_rvv.S
>
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index 07d5c2915d..67e198d754 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -69,6 +69,7 @@ RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
>  OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
>  RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o \
>   riscv/vp9_mc_rvi.o
> -RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
> +RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o \
> +  riscv/vp9_mc_rvv.o
>  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
>  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
> new file mode 100644
> index 00..7cb38ec94a
> --- /dev/null
> +++ b/libavcodec/riscv/vp9_mc_rvv.S
> @@ -0,0 +1,58 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS).
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +.macro vsetvlstatic8 len an maxlen mn=m4
> +.if \len == 4
> +vsetivlizero, \len, e8, mf4, ta, ma
> +.elseif \len == 8
> +vsetivlizero, \len, e8, mf2, ta, ma
> +.elseif \len == 16
> +vsetivlizero, \len, e8, m1, ta, ma
> +.elseif \len == 32
> +li  \an, \len
> +vsetvli zero, \an, e8, m2, ta, ma
> +.elseif \len == 64
> +li  \an, \maxlen
> +vsetvli zero, \an, e8, \mn, ta, ma
> +.endif
> +.endm
> +
> +.macro copy_avg len
> +func ff_vp9_avg\len\()_rvv, zve32x
> +csrwi   vxrm, 0
> +vsetvlstatic8   \len, t0, 64
> +1:
> +vle8.v  v8, (a2)
> +vle8.v  v16, (a0)
> +vaaddu.vv   v8, v8, v16
> +addia4, a4, -1
> +vse8.v  v8, (a0)
> +add a2, a2, a3
> +add a0, a0, a1
> +bneza4, 1b
> +ret
> +endfunc
> +.endm
> +
> +.irp len, 64, 32, 16, 8, 4
> +copy_avg \len
> +.endr
> diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
> index 79330b4968..ff8431591c 100644
> --- a/libavcodec/riscv/vp9dsp.h
> +++ b/libavcodec/riscv/vp9dsp.h
> @@ -138,11 +138,11 @@ void ff_avg_bilin_##SIZE##hv_rvv(uint8_t *dst,
> ptrdiff_t dststride,\
>   int h, int mx, int my);
>
>  #define VP9_COPY_AVG_RISCV_RVV_FUNC(SIZE)   \
> -void ff_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride,\
> +void ff_vp9_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride,\
>   const uint8_t *src, ptrdiff_t srcstride,  \
>   int h, int mx, int my);   \
> \
> -void ff_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
> +void ff_vp9_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
>  const uint8_t *src, ptrdiff_t srcstride,   \
>  int h, int mx, int my);
>
> diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> index ab99294d44..454dcd963f 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -48,6 +48,24 @@ static av_cold void vp9dsp_mc_init_risc

[FFmpeg-devel] [PATCH v2 5/5] lavc/vp9dsp: R-V V mc tap hv

2024-05-21 Thread uk7b
From: sunyuechi 

 C908   X60
vp9_avg_8tap_smooth_4hv_8bpp_c :   32.0   28.2
vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32   :   15.0   13.2
vp9_avg_8tap_smooth_8hv_8bpp_c :   98.0   86.2
vp9_avg_8tap_smooth_8hv_8bpp_rvv_i32   :   23.7   21.0
vp9_avg_8tap_smooth_16hv_8bpp_c:  355.5  297.0
vp9_avg_8tap_smooth_16hv_8bpp_rvv_i32  :   62.7   41.2
vp9_avg_8tap_smooth_32hv_8bpp_c: 1273.0 1099.7
vp9_avg_8tap_smooth_32hv_8bpp_rvv_i32  :  133.7  119.2
vp9_avg_8tap_smooth_64hv_8bpp_c: 4933.0 4240.5
vp9_avg_8tap_smooth_64hv_8bpp_rvv_i32  :  506.7  227.0
vp9_put_8tap_smooth_4hv_8bpp_c :   30.2   27.0
vp9_put_8tap_smooth_4hv_8bpp_rvv_i32   :   14.5   12.7
vp9_put_8tap_smooth_8hv_8bpp_c :   91.2   81.2
vp9_put_8tap_smooth_8hv_8bpp_rvv_i32   :   22.7   20.2
vp9_put_8tap_smooth_16hv_8bpp_c:  329.2  277.7
vp9_put_8tap_smooth_16hv_8bpp_rvv_i32  :   44.7   40.0
vp9_put_8tap_smooth_32hv_8bpp_c: 1183.7 1022.7
vp9_put_8tap_smooth_32hv_8bpp_rvv_i32  :  130.7  116.5
vp9_put_8tap_smooth_64hv_8bpp_c: 4502.7 3954.5
vp9_put_8tap_smooth_64hv_8bpp_rvv_i32  :  496.0  224.7
---
 libavcodec/riscv/vp9_mc_rvv.S  | 75 ++
 libavcodec/riscv/vp9dsp_init.c |  8 
 2 files changed, 83 insertions(+)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index d7db775df7..06c79b16f7 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -362,6 +362,77 @@ func 
ff_\op\()_vp9_8tap_\name\()_\len\()\type\()_rvv\vlen\(), zve32x
 endfunc
 .endm
 
+#if __riscv_xlen == 64
+.macro epel_hv_once len name op
+sub a2, a2, a3
+sub a2, a2, a3
+sub a2, a2, a3
+.irp n,0,2,4,6,8,10,12,14
+epel_load_inc   v\n, \len, put, \name, h, 1, t
+.endr
+addia4, a4, -1
+1:
+addia4, a4, -1
+epel_load   v30, \len, \op, \name, v, 0, s
+vse8.v  v30, (a0)
+vmv.v.v v0, v2
+vmv.v.v v2, v4
+vmv.v.v v4, v6
+vmv.v.v v6, v8
+vmv.v.v v8, v10
+vmv.v.v v10, v12
+vmv.v.v v12, v14
+epel_load   v14, \len, put, \name, h, 1, t
+add a2, a2, a3
+add a0, a0, a1
+bneza4, 1b
+epel_load   v30, \len, \op, \name, v, 0, s
+vse8.v  v30, (a0)
+.endm
+
+.macro epel_hv op name len vlen
+func ff_\op\()_vp9_8tap_\name\()_\len\()hv_rvv\vlen\(), zve32x
+addisp, sp, -64
+.irp n,0,1,2,3,4,5,6,7
+sd  s\n, \n\()<<3(sp)
+.endr
+.if \len == 64 && \vlen < 256
+addisp, sp, -48
+.irp n,0,1,2,3,4,5
+sd  a\n, \n\()<<3(sp)
+.endr
+.endif
+.ifc \op,avg
+csrwi   vxrm, 0
+.endif
+epel_filter \name, h, t
+epel_filter \name, v, s
+.if \vlen < 256
+vsetvlstatic8   \len, a6, 32, m2
+.else
+vsetvlstatic8   \len, a6, 64, m2
+.endif
+epel_hv_once\len, \name, \op
+.if \len == 64 && \vlen < 256
+.irp n,0,1,2,3,4,5
+ld  a\n, \n\()<<3(sp)
+.endr
+addisp, sp, 48
+addia0, a0, 32
+addia2, a2, 32
+epel_filter \name, h, t
+epel_hv_once\len, \name, \op
+.endif
+.irp n,0,1,2,3,4,5,6,7
+ld  s\n, \n\()<<3(sp)
+.endr
+addisp, sp, 64
+
+ret
+endfunc
+.endm
+#endif
+
 .irp len, 64, 32, 16, 8, 4
 copy_avg \len
 .irp op, put, avg
@@ -373,6 +444,10 @@ endfunc
 epel \len, \op, \name, \type, 128
 epel \len, \op, \name, \type, 256
 .endr
+#if __riscv_xlen == 64
+epel_hv \op, \name, \len, 128
+epel_hv \op, \name, \len, 256
+#endif
 .endr
 .endr
 .endr
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index be5369d506..887dba461f 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -118,6 +118,10 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext 
*dsp, int bpp)
 if (flags & AV_CPU_FLAG_RVB_ADDR) {
 init_subpel2(0, 0, 1, v, put, 128);
 init_subpel2(1, 0, 1, v, avg, 128);
+# if __riscv_xlen == 64
+init_subpel2(0, 1, 1, hv, put, 128);
+init_subpel2(1, 1, 1, hv, avg, 128);
+# endif
 }
 

[FFmpeg-devel] [PATCH v2 4/5] lavc/vp9dsp: R-V V mc bilin hv

2024-05-21 Thread uk7b
From: sunyuechi 

C908:
vp9_avg_bilin_4hv_8bpp_c: 11.0
vp9_avg_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_avg_bilin_16hv_8bpp_c: 147.0
vp9_avg_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_avg_bilin_32hv_8bpp_c: 574.5
vp9_avg_bilin_32hv_8bpp_rvv_i64: 42.7
vp9_avg_bilin_64hv_8bpp_c: 2311.5
vp9_avg_bilin_64hv_8bpp_rvv_i64: 201.7
vp9_put_bilin_4hv_8bpp_c: 10.0
vp9_put_bilin_4hv_8bpp_rvv_i64: 3.2
vp9_put_bilin_8hv_8bpp_c: 35.2
vp9_put_bilin_8hv_8bpp_rvv_i64: 6.5
vp9_put_bilin_16hv_8bpp_c: 133.7
vp9_put_bilin_16hv_8bpp_rvv_i64: 13.0
vp9_put_bilin_32hv_8bpp_c: 538.2
vp9_put_bilin_32hv_8bpp_rvv_i64: 39.7
vp9_put_bilin_64hv_8bpp_c: 2114.0
vp9_put_bilin_64hv_8bpp_rvv_i64: 153.7
---
 libavcodec/riscv/vp9_mc_rvv.S  | 34 ++
 libavcodec/riscv/vp9dsp_init.c | 10 ++
 2 files changed, 44 insertions(+)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index adba4afb90..d7db775df7 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -104,6 +104,39 @@ func ff_\op\()_vp9_bilin_\len\()\type\()_rvv, zve32x
 endfunc
 .endm
 
+.macro bilin_hv len op
+func ff_\op\()_vp9_bilin_\len\()hv_rvv, zve32x
+.ifc \op,avg
+csrwi   vxrm, 0
+.endif
+vsetvlstatic8   \len, t0, 64
+neg t1, a5
+neg t2, a6
+li  t4, 8
+bilin_load  v24, \len, put, h, a5
+add a2, a2, a3
+1:
+addia4, a4, -1
+bilin_load  v4, \len, put, h, a5
+vwmulu.vx   v16, v4, a6
+vwmaccsu.vx v16, t2, v24
+vwadd.wxv16, v16, t4
+vnsra.wiv16, v16, 4
+vadd.vv v0, v16, v24
+.ifc \op,avg
+vle8.v  v16, (a0)
+vaaddu.vv   v0, v0, v16
+.endif
+vse8.v  v0, (a0)
+vmv.v.v v24, v4
+add a2, a2, a3
+add a0, a0, a1
+bneza4, 1b
+
+ret
+endfunc
+.endm
+
 const subpel_filters_regular
 .byte  0,  0,   0, 128,   0,   0,  0,  0
 .byte  0,  1,  -5, 126,   8,  -3,  1,  0
@@ -334,6 +367,7 @@ endfunc
 .irp op, put, avg
 bilin_h_v \len, \op, h, a5
 bilin_h_v \len, \op, v, a6
+bilin_hv \len, \op
 .irp name, regular, sharp, smooth
 .irp type, h, v
 epel \len, \op, \name, \type, 128
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 314a1e5808..be5369d506 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -84,6 +84,16 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, 
int bpp)
 dsp->mc[4][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_4h_rvv;
 dsp->mc[4][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_4v_rvv;
 dsp->mc[4][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_4h_rvv;
+dsp->mc[0][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_64hv_rvv;
+dsp->mc[0][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_64hv_rvv;
+dsp->mc[1][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_32hv_rvv;
+dsp->mc[1][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_32hv_rvv;
+dsp->mc[2][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_16hv_rvv;
+dsp->mc[2][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_16hv_rvv;
+dsp->mc[3][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_8hv_rvv;
+dsp->mc[3][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_8hv_rvv;
+dsp->mc[4][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_4hv_rvv;
+dsp->mc[4][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_4hv_rvv;
 
 #undef init_fpel
 
-- 
2.45.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2 3/5] lavc/vp9dsp: R-V V mc tap h v

2024-05-21 Thread uk7b
From: sunyuechi 

 C908   X60
vp9_avg_8tap_smooth_4h_8bpp_c  :   13.0   11.2
vp9_avg_8tap_smooth_4h_8bpp_rvv_i32:5.04.2
vp9_avg_8tap_smooth_4v_8bpp_c  :   13.7   12.5
vp9_avg_8tap_smooth_4v_8bpp_rvv_i32:5.04.2
vp9_avg_8tap_smooth_8h_8bpp_c  :   49.5   42.2
vp9_avg_8tap_smooth_8h_8bpp_rvv_i32:9.28.5
vp9_avg_8tap_smooth_8v_8bpp_c  :   66.5   45.0
vp9_avg_8tap_smooth_8v_8bpp_rvv_i32:9.58.5
vp9_avg_8tap_smooth_16h_8bpp_c :  192.7  166.5
vp9_avg_8tap_smooth_16h_8bpp_rvv_i32   :   21.2   18.7
vp9_avg_8tap_smooth_16v_8bpp_c :  192.2  175.7
vp9_avg_8tap_smooth_16v_8bpp_rvv_i32   :   21.5   19.0
vp9_avg_8tap_smooth_32h_8bpp_c :  780.2  663.7
vp9_avg_8tap_smooth_32h_8bpp_rvv_i32   :   83.5   60.0
vp9_avg_8tap_smooth_32v_8bpp_c :  770.5  689.2
vp9_avg_8tap_smooth_32v_8bpp_rvv_i32   :   67.2   60.0
vp9_avg_8tap_smooth_64h_8bpp_c : 3115.5 2647.2
vp9_avg_8tap_smooth_64h_8bpp_rvv_i32   :  283.5  119.2
vp9_avg_8tap_smooth_64v_8bpp_c : 3082.2 2729.0
vp9_avg_8tap_smooth_64v_8bpp_rvv_i32   :  305.2  119.0
vp9_put_8tap_smooth_4h_8bpp_c  :   11.29.7
vp9_put_8tap_smooth_4h_8bpp_rvv_i32:4.24.0
vp9_put_8tap_smooth_4v_8bpp_c  :   11.7   10.7
vp9_put_8tap_smooth_4v_8bpp_rvv_i32:4.24.0
vp9_put_8tap_smooth_8h_8bpp_c  :   42.0   37.5
vp9_put_8tap_smooth_8h_8bpp_rvv_i32:8.57.7
vp9_put_8tap_smooth_8v_8bpp_c  :   44.2   38.7
vp9_put_8tap_smooth_8v_8bpp_rvv_i32:8.57.7
vp9_put_8tap_smooth_16h_8bpp_c :  165.7  147.2
vp9_put_8tap_smooth_16h_8bpp_rvv_i32   :   19.5   17.5
vp9_put_8tap_smooth_16v_8bpp_c :  169.0  149.7
vp9_put_8tap_smooth_16v_8bpp_rvv_i32   :   19.7   17.5
vp9_put_8tap_smooth_32h_8bpp_c :  659.7  586.7
vp9_put_8tap_smooth_32h_8bpp_rvv_i32   :   64.2   57.2
vp9_put_8tap_smooth_32v_8bpp_c :  680.5  591.2
vp9_put_8tap_smooth_32v_8bpp_rvv_i32   :   64.2   57.2
vp9_put_8tap_smooth_64h_8bpp_c : 2681.5 2339.0
vp9_put_8tap_smooth_64h_8bpp_rvv_i32   :  255.5  114.2
vp9_put_8tap_smooth_64v_8bpp_c : 2709.7 2348.7
vp9_put_8tap_smooth_64v_8bpp_rvv_i32   :  255.5  114.0
---
 libavcodec/riscv/vp9_mc_rvv.S  | 243 +
 libavcodec/riscv/vp9dsp.h  |  72 ++
 libavcodec/riscv/vp9dsp_init.c |  38 +-
 3 files changed, 328 insertions(+), 25 deletions(-)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 739380d9a9..adba4afb90 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -36,6 +36,18 @@
 .endif
 .endm
 
+.macro vsetvlstatic16 len
+.ifc \len,4
+vsetvli zero, zero, e16, mf2, ta, ma
+.elseif \len == 8
+vsetvli zero, zero, e16, m1, ta, ma
+.elseif \len == 16
+vsetvli zero, zero, e16, m2, ta, ma
+.else
+vsetvli zero, zero, e16, m4, ta, ma
+.endif
+.endm
+
 .macro copy_avg len
 func ff_vp9_avg\len\()_rvv, zve32x
 csrwi   vxrm, 0
@@ -92,10 +104,241 @@ func ff_\op\()_vp9_bilin_\len\()\type\()_rvv, zve32x
 endfunc
 .endm
 
+const subpel_filters_regular
+.byte  0,  0,   0, 128,   0,   0,  0,  0
+.byte  0,  1,  -5, 126,   8,  -3,  1,  0
+.byte -1,  3, -10, 122,  18,  -6,  2,  0
+.byte -1,  4, -13, 118,  27,  -9,  3, -1
+.byte -1,  4, -16, 112,  37, -11,  4, -1
+.byte -1,  5, -18, 105,  48, -14,  4, -1
+.byte -1,  5, -19,  97,  58, -16,  5, -1
+.byte -1,  6, -19,  88,  68, -18,  5, -1
+.byte -1,  6, -19,  78,  78, -19,  6, -1
+.byte -1,  5, -18,  68,  88, -19,  6, -1
+.byte -1,  5, -16,  58,  97, -19,  5, -1
+.byte -1,  4, -14,  48, 105, -18,  5, -1
+.byte -1,  4, -11,  37, 112, -16,  4, -1
+.byte -1,  3,  -9,  27, 118, -13,  4, -1
+.byte  0,  2,  -6,  18, 122, -10,  3, -1
+.byte  0,  1,  -3,   8, 126,  -5,  1,  0
+subpel_filters_sharp:
+.byte  0,  0,   0, 128,   0,   0,  0,  0
+.byte -1,  3,  -7, 127,   8,  -3,  1,  0
+.byte -2,  5, -13, 125,  17,  -6,  3, -1
+.byte -3,  7, -17, 121,  27, -10,  5, -2
+.byte -4,  9, -20, 115,  37, -13,  6, -2
+.byte -4, 10, -23, 108,  48, -16,  8, -3
+.byte -4, 10, -24, 100,  59, -19,  9, -3
+.byte -4, 11, -24,  90,  70, -21, 10, -4
+.byte -4, 11, -23,  80,  80, -23,

[FFmpeg-devel] [PATCH v2 2/5] lavc/vp9dsp: R-V V mc bilin h v

2024-05-21 Thread uk7b
From: sunyuechi 

C908:
vp9_avg_bilin_4h_8bpp_c: 5.2
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8h_8bpp_c: 20.0
vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8v_8bpp_c: 21.0
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16h_8bpp_c: 78.2
vp9_avg_bilin_16h_8bpp_rvv_i64: 9.0
vp9_avg_bilin_16v_8bpp_c: 82.0
vp9_avg_bilin_16v_8bpp_rvv_i64: 9.0
vp9_avg_bilin_32h_8bpp_c: 325.5
vp9_avg_bilin_32h_8bpp_rvv_i64: 26.2
vp9_avg_bilin_32v_8bpp_c: 326.2
vp9_avg_bilin_32v_8bpp_rvv_i64: 26.2
vp9_avg_bilin_64h_8bpp_c: 1265.7
vp9_avg_bilin_64h_8bpp_rvv_i64: 91.5
vp9_avg_bilin_64v_8bpp_c: 1317.0
vp9_avg_bilin_64v_8bpp_rvv_i64: 91.2
vp9_put_bilin_4h_8bpp_c: 4.5
vp9_put_bilin_4h_8bpp_rvv_i64: 1.7
vp9_put_bilin_4v_8bpp_c: 4.7
vp9_put_bilin_4v_8bpp_rvv_i64: 1.7
vp9_put_bilin_8h_8bpp_c: 17.0
vp9_put_bilin_8h_8bpp_rvv_i64: 3.5
vp9_put_bilin_8v_8bpp_c: 18.0
vp9_put_bilin_8v_8bpp_rvv_i64: 3.5
vp9_put_bilin_16h_8bpp_c: 65.2
vp9_put_bilin_16h_8bpp_rvv_i64: 7.5
vp9_put_bilin_16v_8bpp_c: 85.7
vp9_put_bilin_16v_8bpp_rvv_i64: 7.5
vp9_put_bilin_32h_8bpp_c: 257.5
vp9_put_bilin_32h_8bpp_rvv_i64: 23.5
vp9_put_bilin_32v_8bpp_c: 274.5
vp9_put_bilin_32v_8bpp_rvv_i64: 23.5
vp9_put_bilin_64h_8bpp_c: 1040.5
vp9_put_bilin_64h_8bpp_rvv_i64: 82.5
vp9_put_bilin_64v_8bpp_c: 1108.7
vp9_put_bilin_64v_8bpp_rvv_i64: 82.2
---
 libavcodec/riscv/vp9_mc_rvv.S  | 43 ++
 libavcodec/riscv/vp9dsp.h  | 12 +-
 libavcodec/riscv/vp9dsp_init.c | 21 +
 3 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 7cb38ec94a..739380d9a9 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -53,6 +53,49 @@ func ff_vp9_avg\len\()_rvv, zve32x
 endfunc
 .endm
 
+.macro bilin_load dst len op type mn
+.ifc \type,v
+add t5, a2, a3
+.else
+addit5, a2, 1
+.endif
+vle8.v  v8, (a2)
+vle8.v  v0, (t5)
+vwmulu.vx   v16, v0, \mn
+vwmaccsu.vx v16, t1, v8
+vwadd.wxv16, v16, t4
+vnsra.wiv16, v16, 4
+vadd.vv \dst, v16, v8
+.ifc \op,avg
+vle8.v  v16, (a0)
+vaaddu.vv   \dst, \dst, v16
+.endif
+.endm
+
+.macro bilin_h_v len op type mn
+func ff_\op\()_vp9_bilin_\len\()\type\()_rvv, zve32x
+.ifc \op,avg
+csrwi   vxrm, 0
+.endif
+vsetvlstatic8   \len, t0, 64
+li  t4, 8
+neg t1, \mn
+1:
+addia4, a4, -1
+bilin_load  v0, \len, \op, \type, \mn
+vse8.v  v0, (a0)
+add a2, a2, a3
+add a0, a0, a1
+bneza4, 1b
+
+ret
+endfunc
+.endm
+
 .irp len, 64, 32, 16, 8, 4
 copy_avg \len
+.irp op, put, avg
+bilin_h_v \len, \op, h, a5
+bilin_h_v \len, \op, v, a6
+.endr
 .endr
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
index ff8431591c..8fb326dae0 100644
--- a/libavcodec/riscv/vp9dsp.h
+++ b/libavcodec/riscv/vp9dsp.h
@@ -113,27 +113,27 @@ void ff_avg_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, 
ptrdiff_t dststride,  \
  int h, int mx, int my);
 
 #define VP9_BILINEAR_RISCV_RVV_FUNC(SIZE)   \
-void ff_put_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+void ff_put_vp9_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
 const uint8_t *src, ptrdiff_t srcstride,   \
 int h, int mx, int my);\
\
-void ff_put_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+void ff_put_vp9_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
 const uint8_t *src, ptrdiff_t srcstride,   \
 int h, int mx, int my);\
\
-void ff_put_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride,\
+void ff_put_vp9_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride,\
  const uint8_t *src, ptrdiff_t srcstride,  \
  int h, int mx, int my);   \
\
-void ff_avg_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+void ff_avg_vp9_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
 const uint8_t *src, ptrdiff_t srcstride,   \
 int h, int mx, int my);\
   

[FFmpeg-devel] [PATCH v2 1/5] lavc/vp9dsp: R-V V mc avg

2024-05-21 Thread uk7b
From: sunyuechi 

C908:
vp9_avg4_8bpp_c: 1.2
vp9_avg4_8bpp_rvv_i64: 1.0
vp9_avg8_8bpp_c: 3.7
vp9_avg8_8bpp_rvv_i64: 1.5
vp9_avg16_8bpp_c: 14.7
vp9_avg16_8bpp_rvv_i64: 3.5
vp9_avg32_8bpp_c: 57.7
vp9_avg32_8bpp_rvv_i64: 10.0
vp9_avg64_8bpp_c: 229.0
vp9_avg64_8bpp_rvv_i64: 31.7
---
 libavcodec/riscv/Makefile  |  3 +-
 libavcodec/riscv/vp9_mc_rvv.S  | 58 ++
 libavcodec/riscv/vp9dsp.h  |  4 +--
 libavcodec/riscv/vp9dsp_init.c | 18 +++
 4 files changed, 80 insertions(+), 3 deletions(-)
 create mode 100644 libavcodec/riscv/vp9_mc_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 07d5c2915d..67e198d754 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -69,6 +69,7 @@ RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
 OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
 RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o \
  riscv/vp9_mc_rvi.o
-RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o \
+  riscv/vp9_mc_rvv.o
 OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
 RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
new file mode 100644
index 00..7cb38ec94a
--- /dev/null
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro vsetvlstatic8 len an maxlen mn=m4
+.if \len == 4
+vsetivlizero, \len, e8, mf4, ta, ma
+.elseif \len == 8
+vsetivlizero, \len, e8, mf2, ta, ma
+.elseif \len == 16
+vsetivlizero, \len, e8, m1, ta, ma
+.elseif \len == 32
+li  \an, \len
+vsetvli zero, \an, e8, m2, ta, ma
+.elseif \len == 64
+li  \an, \maxlen
+vsetvli zero, \an, e8, \mn, ta, ma
+.endif
+.endm
+
+.macro copy_avg len
+func ff_vp9_avg\len\()_rvv, zve32x
+csrwi   vxrm, 0
+vsetvlstatic8   \len, t0, 64
+1:
+vle8.v  v8, (a2)
+vle8.v  v16, (a0)
+vaaddu.vv   v8, v8, v16
+addia4, a4, -1
+vse8.v  v8, (a0)
+add a2, a2, a3
+add a0, a0, a1
+bneza4, 1b
+ret
+endfunc
+.endm
+
+.irp len, 64, 32, 16, 8, 4
+copy_avg \len
+.endr
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
index 79330b4968..ff8431591c 100644
--- a/libavcodec/riscv/vp9dsp.h
+++ b/libavcodec/riscv/vp9dsp.h
@@ -138,11 +138,11 @@ void ff_avg_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t 
dststride,\
  int h, int mx, int my);
 
 #define VP9_COPY_AVG_RISCV_RVV_FUNC(SIZE)   \
-void ff_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride,\
+void ff_vp9_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride,\
  const uint8_t *src, ptrdiff_t srcstride,  \
  int h, int mx, int my);   \
\
-void ff_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
+void ff_vp9_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
 const uint8_t *src, ptrdiff_t srcstride,   \
 int h, int mx, int my);
 
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index ab99294d44..454dcd963f 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -48,6 +48,24 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, 
int bpp)
 }
 # endif
 
+#if HAVE_RVV
+if (bpp == 8 && (flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
+
+#define init_fpel(idx1, sz)   \
+dsp->mc[idx1][FILTER_8TAP_SMOOTH ][1][0][0] = ff_vp9_avg##sz##_rvv;  \
+dsp->mc[idx1][FILTER_8TAP_REGULAR][1][0][0] = ff_vp9_avg##sz##_rvv;  \
+dsp->mc[idx1][FILTER_

Re: [FFmpeg-devel] [PATCH v12 0/8] [WIP] webp: add support for animated WebP decoding

2024-05-21 Thread Thilo Borgmann via ffmpeg-devel

Hi,

[...]

Tests mostly work for me. There are a few images (that I reported
earlier) that give:


thanks for testing!



Canvas change detected. The output will be damaged. Use -threads 1
to try decoding with best effort.
They don't animate without that option and with it render incorrectly.


That issue yields from the canvas frame being the synchronization object
(ThreadFrame) - doing so prevents the canvas size changed mid-stream.
_Maybe_ this can be fixed switching the whole frame multithreading away
from ThreadFrame to sth else, not sure though and no experience with the
alternatives (AVExecutor?). Maybe Andreas can predict if it's
worth/valid to change that whole part of it? I'm not against putting
more effort into it to get it right.


I could fix 488x488.webp and have an almost identical output to libwebp.

488x488.webp features an ARGB canvas and has both, ARGB & YUVA420P 
p-frames.


Do you have more files with other variations of canvas & p-frames? If 
they at all exist... e.g. canvas YUV and p-frames RGB?


Pinged Meta as well for real-world samples. Will take some more days 
until I get feedback. Will then post the next iteration...


Thanks,
Thilo
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v9 11/13] avutil/hwcontext_d3d12va: add Flags for resource creation

2024-05-21 Thread Andrew Sayers
(Only reviewing documentation, not code)

On Mon, May 20, 2024 at 10:52:20PM +0800, tong1.wu-at-intel@ffmpeg.org 
wrote:
> From: Tong Wu 
> 
> Flags field is added to support diffferent resource creation.
> 
> Signed-off-by: Tong Wu 
> ---
>  doc/APIchanges| 3 +++
>  libavutil/hwcontext_d3d12va.c | 2 +-
>  libavutil/hwcontext_d3d12va.h | 8 
>  libavutil/version.h   | 2 +-
>  4 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/APIchanges b/doc/APIchanges
> index 269fd36559..808ba02f2d 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -2,6 +2,9 @@ The last version increases of all libraries were on 2024-03-07
>  
>  API changes, most recent first:
>  
> +2024-01-xx - xx - lavu 59.20.100 - hwcontext_d3d12va.h
> + Add AVD3D12VAFramesContext.flags
> +
>  2024-05-xx - xx - lavu 59.19.100 - hwcontext_qsv.h
>Add AVQSVFramesContext.info
>  
> diff --git a/libavutil/hwcontext_d3d12va.c b/libavutil/hwcontext_d3d12va.c
> index cfc016315d..6507cf69c1 100644
> --- a/libavutil/hwcontext_d3d12va.c
> +++ b/libavutil/hwcontext_d3d12va.c
> @@ -247,7 +247,7 @@ static AVBufferRef *d3d12va_pool_alloc(void *opaque, 
> size_t size)
>  .Format   = hwctx->format,
>  .SampleDesc   = {.Count = 1, .Quality = 0 },
>  .Layout   = D3D12_TEXTURE_LAYOUT_UNKNOWN,
> -.Flags= D3D12_RESOURCE_FLAG_NONE,
> +.Flags= hwctx->flags,
>  };
>  
>  frame = av_mallocz(sizeof(AVD3D12VAFrame));
> diff --git a/libavutil/hwcontext_d3d12va.h b/libavutil/hwcontext_d3d12va.h
> index ff06e6f2ef..608dbac97f 100644
> --- a/libavutil/hwcontext_d3d12va.h
> +++ b/libavutil/hwcontext_d3d12va.h
> @@ -129,6 +129,14 @@ typedef struct AVD3D12VAFramesContext {
>   * If unset, will be automatically set.
>   */
>  DXGI_FORMAT format;
> +
> +/**
> + * This field is used to specify options for working with resources.
> + * If unset, this will be D3D12_RESOURCE_FLAG_NONE.
> + *
> + * @see: 
> https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_resource_flags.
> + */
> +D3D12_RESOURCE_FLAGS flags;

Some nitpicks:

* "This field is used to specify" is redundant, you can save the reader
  a few seconds by starting the sentence with just "Options..."
* "@see" starts a paragraph, so the rendered documentation will look better
  without the ":"
* the full stop after the URL makes it harder to copy/paste the text -
  remove the full stop or use a [markdown link](...)
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] 回复: [PATCH] x86/vvc_alf: use the x86inc instruction macros

2024-05-21 Thread Wu Jianhua
> 发件人: ffmpeg-devel  代表 James Almer 
> 
> 发送时间: 2024年5月21日 6:52
> 收件人: ffmpeg-devel@ffmpeg.org
> 主题: [FFmpeg-devel] [PATCH] x86/vvc_alf: use the x86inc instruction macros
> 
> Let its magic figure out the correct mnemonic based on target instruction set.
> 
> Signed-off-by: James Almer 
> ---
>  libavcodec/x86/vvc/vvc_alf.asm | 202 -
>  1 file changed, 101 insertions(+), 101 deletions(-)

I tested this patch and LGTM. Thanks for updating them. 

And would it be better to add avcodec to the path of the commit message?

Thanks,
Jianhua
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] lavc/vvc_mc: R-V V avg w_avg

2024-05-21 Thread Rémi Denis-Courmont
Le tiistaina 21. toukokuuta 2024, 10.37.51 EEST u...@foxmail.com a écrit :
> From: sunyuechi 
> ---
>  libavcodec/riscv/Makefile  |   2 +
>  libavcodec/riscv/vvc_mc_rvv.S  | 312 +
>  libavcodec/riscv/vvcdsp_init.c |  76 
>  libavcodec/vvc/dsp.c   |   4 +-
>  libavcodec/vvc/dsp.h   |   1 +
>  5 files changed, 394 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/riscv/vvc_mc_rvv.S
>  create mode 100644 libavcodec/riscv/vvcdsp_init.c
> 
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index 27b268ae39..6297664fc9 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -68,3 +68,5 @@ RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o \
>  RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
>  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
>  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> +OBJS-$(CONFIG_VVC_DECODER) += riscv/vvcdsp_init.o
> +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc_mc_rvv.o
> diff --git a/libavcodec/riscv/vvc_mc_rvv.S b/libavcodec/riscv/vvc_mc_rvv.S
> new file mode 100644
> index 00..26a6afba1f
> --- /dev/null
> +++ b/libavcodec/riscv/vvc_mc_rvv.S
> @@ -0,0 +1,312 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS). + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +.macro vsetvlstatic8 w vlen is_w
> +.if \w <= 2
> +vsetivlizero, \w, e8, mf8, ta, ma
> +.elseif \w <= 4 && \vlen == 128
> +vsetivlizero, \w, e8, mf4, ta, ma
> +.elseif \w <= 4 && \vlen >= 256
> +vsetivlizero, \w, e8, mf8, ta, ma
> +.elseif \w <= 8 && \vlen == 128
> +vsetivlizero, \w, e8, mf2, ta, ma
> +.elseif \w <= 8 && \vlen >= 256
> +vsetivlizero, \w, e8, mf4, ta, ma
> +.elseif \w <= 16 && \vlen == 128
> +vsetivlizero, \w, e8, m1, ta, ma
> +.elseif \w <= 16 && \vlen >= 256
> +vsetivlizero, \w, e8, mf2, ta, ma
> +.elseif \w <= 32 && \vlen >= 256
> +li t0, \w
> +vsetvli zero, t0, e8, m1, ta, ma
> +.elseif \w <= (\vlen / 4) || \is_w
> +li t0, 64
> +vsetvli zero, t0, e8, m2, ta, ma
> +.else
> +li t0, \w
> +vsetvli zero, t0, e8, m4, ta, ma
> +.endif
> +.endm
> +
> +.macro vsetvlstatic16 w vlen is_w
> +.if \w <= 2
> +vsetivlizero, \w, e16, mf4, ta, ma
> +.elseif \w <= 4 && \vlen == 128
> +vsetivlizero, \w, e16, mf2, ta, ma
> +.elseif \w <= 4 && \vlen >= 256
> +vsetivlizero, \w, e16, mf4, ta, ma
> +.elseif \w <= 8 && \vlen == 128
> +vsetivlizero, \w, e16, m1, ta, ma
> +.elseif \w <= 8 && \vlen >= 256
> +vsetivlizero, \w, e16, mf2, ta, ma
> +.elseif \w <= 16 && \vlen == 128
> +vsetivlizero, \w, e16, m2, ta, ma
> +.elseif \w <= 16 && \vlen >= 256
> +vsetivlizero, \w, e16, m1, ta, ma
> +.elseif \w <= 32 && \vlen >= 256
> +li t0, \w
> +vsetvli zero, t0, e16, m2, ta, ma
> +.elseif \w <= (\vlen / 4) || \is_w
> +li t0, 64
> +vsetvli zero, t0, e16, m4, ta, ma
> +.else
> +li t0, \w
> +vsetvli zero, t0, e16, m8, ta, ma
> +.endif
> +.endm
> +
> +.macro vsetvlstatic32 w vlen
> +.if \w <= 2
> +vsetivlizero, \w, e32, mf2, ta, ma
> +.elseif \w <= 4 && \vlen == 128
> +vsetivlizero, \w, e32, m1, ta, ma
> +.elseif \w <= 4 && \vlen >= 256
> +vsetivlizero, \w, e32, mf2, ta, ma
> +.elseif \w <= 8 && \vlen == 128
> +vsetivlizero, \w, e32, m2, ta, ma
> +.elseif \w <= 8 && \vlen >= 256
> +  

Re: [FFmpeg-devel] [PATCH v4 1/5] lavc/vp9dsp: R-V V mc avg

2024-05-21 Thread Rémi Denis-Courmont
Le tiistaina 21. toukokuuta 2024, 18.36.02 EEST flow gg a écrit :
> > Please put commas between operands.
> 
> Okay
> 
> > This should probably be ff_avg_vp9 or something slightly more specific.
> 
> Is it necessary here?

It works because VP9 is the odd one out doing this. All non-static symbols in 
libavcodec are in the same namespace.

> Many macros in the C file are copied from MIPS, where
> it is called ff_avg4_msa. Here, it has been simply changed to ff_avg4_rvv.

AArch64 uses ff_vp9_avg which seems a lot saner.


-- 
Rémi Denis-Courmont
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] checkasm: print bench runs when benchmarking

2024-05-21 Thread Lynne via ffmpeg-devel

On 21/05/2024 17:07, J. Dekker wrote:


Lynne via ffmpeg-devel  writes:


Helps make sense of the possible noise in the results.
---
  tests/checkasm/checkasm.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 669f2be9c1..b1c175b95d 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -892,6 +892,9 @@ int main(int argc, char *argv[])
  fprintf(stderr, "checkasm: using random seed %u\n", seed);
  av_lfg_init(&checkasm_lfg, seed);
  
+if (state.bench_pattern)

+fprintf(stderr, "checkasm: bench runs %" PRIu64 " (1 << %i)\n", 
bench_runs, av_log2(bench_runs));
+
  check_cpu_flag(NULL, 0);
  for (i = 0; cpus[i].flag; i++)
  check_cpu_flag(cpus[i].name, cpus[i].flag);


LGTM.



Thanks, pushed


OpenPGP_0xA2FEA5F03F034464.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4 1/5] lavc/vp9dsp: R-V V mc avg

2024-05-21 Thread flow gg
> Please put commas between operands.

Okay

> This should probably be ff_avg_vp9 or something slightly more specific.

Is it necessary here? Many macros in the C file are copied from MIPS, where
it is called ff_avg4_msa. Here, it has been simply changed to ff_avg4_rvv.

Rémi Denis-Courmont  于2024年5月21日周二 23:24写道:

> Le lauantaina 18. toukokuuta 2024, 21.15.29 EEST u...@foxmail.com a écrit
> :
> > From: sunyuechi 
> >
> > C908:
> > vp9_avg4_8bpp_c: 1.2
> > vp9_avg4_8bpp_rvv_i64: 1.0
> > vp9_avg8_8bpp_c: 3.7
> > vp9_avg8_8bpp_rvv_i64: 1.5
> > vp9_avg16_8bpp_c: 14.7
> > vp9_avg16_8bpp_rvv_i64: 3.5
> > vp9_avg32_8bpp_c: 57.7
> > vp9_avg32_8bpp_rvv_i64: 10.0
> > vp9_avg64_8bpp_c: 229.0
> > vp9_avg64_8bpp_rvv_i64: 31.7
> > ---
> >  libavcodec/riscv/Makefile  |  3 +-
> >  libavcodec/riscv/vp9_mc_rvv.S  | 58 ++
> >  libavcodec/riscv/vp9dsp_init.c | 18 +++
> >  3 files changed, 78 insertions(+), 1 deletion(-)
> >  create mode 100644 libavcodec/riscv/vp9_mc_rvv.S
> >
> > diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> > index 27b268ae39..4739d83522 100644
> > --- a/libavcodec/riscv/Makefile
> > +++ b/libavcodec/riscv/Makefile
> > @@ -65,6 +65,7 @@ RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
> >  OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
> >  RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o \
> >   riscv/vp9_mc_rvi.o
> > -RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
> > +RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o \
> > +  riscv/vp9_mc_rvv.o
> >  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
> >  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> > diff --git a/libavcodec/riscv/vp9_mc_rvv.S
> b/libavcodec/riscv/vp9_mc_rvv.S
> > new file mode 100644
> > index 00..7811cd9928
> > --- /dev/null
> > +++ b/libavcodec/riscv/vp9_mc_rvv.S
> > @@ -0,0 +1,58 @@
> > +/*
> > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> > (ISCAS). + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA + */
> > +
> > +#include "libavutil/riscv/asm.S"
> > +
> > +.macro vsetvlstatic8 len an maxlen mn=m4
>
> Please put commas between operands.
>
> > +.if \len == 4
> > +vsetivlizero, \len, e8, mf4, ta, ma
> > +.elseif \len == 8
> > +vsetivlizero, \len, e8, mf2, ta, ma
> > +.elseif \len == 16
> > +vsetivlizero, \len, e8, m1, ta, ma
> > +.elseif \len == 32
> > +li  \an, \len
> > +vsetvli zero, \an, e8, m2, ta, ma
> > +.elseif \len == 64
> > +li  \an, \maxlen
> > +vsetvli zero, \an, e8, \mn, ta, ma
> > +.endif
> > +.endm
> > +
> > +.macro copy_avg len
> > +func ff_avg\len\()_rvv, zve32x
>
> This should probably be ff_avg_vp9 or something slightly more specific.
>
> > +csrwi   vxrm, 0
> > +vsetvlstatic8   \len t0 64
> > +1:
> > +vle8.v  v8, (a2)
> > +vle8.v  v16, (a0)
> > +vaaddu.vv   v8, v8, v16
> > +addia4, a4, -1
> > +vse8.v  v8, (a0)
> > +add a2, a2, a3
> > +add a0, a0, a1
> > +bneza4, 1b
> > +ret
> > +endfunc
> > +.endm
> > +
> > +.irp len, 64, 32, 16, 8, 4
> > +copy_avg \len
> > +.endr
> > diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> > index ab99294d44..6bfe23563a 100644
> > --- a/libavcodec/riscv/vp9dsp_init.c
> > +++ b/libavcodec/riscv/vp9dsp_init.c
> > @@ -48,6 +48,24 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext
> > *dsp, int bpp) }
> >  # endif
> >
> > +#if HAVE_RVV
> > +if (bpp == 8 && (flags & AV_CPU_FLAG_RVV_I32) &&
> ff_rv_vlen_least(128))
> > { +
> > +#define init_fpel(idx1, sz)   \
> > +dsp->mc[idx1][FILTER_8TAP_SMOOTH ][1][0][0] = ff_avg##sz##_rvv;  \
> > +dsp->mc[idx1][FILTER_8TAP_REGULAR][1][0][0] = ff_avg##sz##_rvv;  \
> > +dsp->mc[idx1][FILTER_8TAP_SHARP  ][1][0][0] = ff_avg##sz##_rvv;  \
> > +dsp->mc[idx1][FILTER_BILINEAR][1][0][0] = f

Re: [FFmpeg-devel] [PATCH v4 1/5] lavc/vp9dsp: R-V V mc avg

2024-05-21 Thread Rémi Denis-Courmont
Le lauantaina 18. toukokuuta 2024, 21.15.29 EEST u...@foxmail.com a écrit :
> From: sunyuechi 
> 
> C908:
> vp9_avg4_8bpp_c: 1.2
> vp9_avg4_8bpp_rvv_i64: 1.0
> vp9_avg8_8bpp_c: 3.7
> vp9_avg8_8bpp_rvv_i64: 1.5
> vp9_avg16_8bpp_c: 14.7
> vp9_avg16_8bpp_rvv_i64: 3.5
> vp9_avg32_8bpp_c: 57.7
> vp9_avg32_8bpp_rvv_i64: 10.0
> vp9_avg64_8bpp_c: 229.0
> vp9_avg64_8bpp_rvv_i64: 31.7
> ---
>  libavcodec/riscv/Makefile  |  3 +-
>  libavcodec/riscv/vp9_mc_rvv.S  | 58 ++
>  libavcodec/riscv/vp9dsp_init.c | 18 +++
>  3 files changed, 78 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/riscv/vp9_mc_rvv.S
> 
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index 27b268ae39..4739d83522 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -65,6 +65,7 @@ RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
>  OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
>  RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o \
>   riscv/vp9_mc_rvi.o
> -RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
> +RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o \
> +  riscv/vp9_mc_rvv.o
>  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
>  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
> new file mode 100644
> index 00..7811cd9928
> --- /dev/null
> +++ b/libavcodec/riscv/vp9_mc_rvv.S
> @@ -0,0 +1,58 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS). + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +.macro vsetvlstatic8 len an maxlen mn=m4

Please put commas between operands.

> +.if \len == 4
> +vsetivlizero, \len, e8, mf4, ta, ma
> +.elseif \len == 8
> +vsetivlizero, \len, e8, mf2, ta, ma
> +.elseif \len == 16
> +vsetivlizero, \len, e8, m1, ta, ma
> +.elseif \len == 32
> +li  \an, \len
> +vsetvli zero, \an, e8, m2, ta, ma
> +.elseif \len == 64
> +li  \an, \maxlen
> +vsetvli zero, \an, e8, \mn, ta, ma
> +.endif
> +.endm
> +
> +.macro copy_avg len
> +func ff_avg\len\()_rvv, zve32x

This should probably be ff_avg_vp9 or something slightly more specific.

> +csrwi   vxrm, 0
> +vsetvlstatic8   \len t0 64
> +1:
> +vle8.v  v8, (a2)
> +vle8.v  v16, (a0)
> +vaaddu.vv   v8, v8, v16
> +addia4, a4, -1
> +vse8.v  v8, (a0)
> +add a2, a2, a3
> +add a0, a0, a1
> +bneza4, 1b
> +ret
> +endfunc
> +.endm
> +
> +.irp len, 64, 32, 16, 8, 4
> +copy_avg \len
> +.endr
> diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
> index ab99294d44..6bfe23563a 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -48,6 +48,24 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext
> *dsp, int bpp) }
>  # endif
> 
> +#if HAVE_RVV
> +if (bpp == 8 && (flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128))
> { +
> +#define init_fpel(idx1, sz)   \
> +dsp->mc[idx1][FILTER_8TAP_SMOOTH ][1][0][0] = ff_avg##sz##_rvv;  \
> +dsp->mc[idx1][FILTER_8TAP_REGULAR][1][0][0] = ff_avg##sz##_rvv;  \
> +dsp->mc[idx1][FILTER_8TAP_SHARP  ][1][0][0] = ff_avg##sz##_rvv;  \
> +dsp->mc[idx1][FILTER_BILINEAR][1][0][0] = ff_avg##sz##_rvv
> +
> +init_fpel(0, 64);
> +init_fpel(1, 32);
> +init_fpel(2, 16);
> +init_fpel(3, 8);
> +init_fpel(4, 4);
> +
> +#undef init_fpel
> +}
> +#endif
>  #endif
>  }


-- 
Rémi Denis-Courmont
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec/dovi_rpudec - correctly read el_bit_depth_minus8 when ext_mapping_idc is non-zero

2024-05-21 Thread Cosmin Stejerean via ffmpeg-devel


> On May 21, 2024, at 3:21 AM, Niklas Haas  wrote:
> 
> On Tue, 21 May 2024 01:17:32 + Cosmin Stejerean via ffmpeg-devel 
>  wrote:
>> From: Cosmin Stejerean 
>> 
>> It looks like the el_bitdepth_minus8 value in the header can also encode
>> ext_mapping_idc in the upper 8 bits.
>> 
>> Samples having a non-zero ext_mapping_idc fail validation currently because 
>> the
>> value returned is out of range. This bypasses this by currently ignoring the
>> ext_mapping_idc and using only the lower 8 bits for el_bitdepth_minus8.
> 
> What is ext_mapping_idc? If it's signalled data that can't be
> reconstructed, we need to store it somewhere into AVDOVIMetadata and
> then re-synthesize it during encoding. Otherwise the RPU transcode will
> be lossy.

I'm not actually sure what it does, but from what I can tell on the current 
samples it doesn't matter if in the process of transcoding it ends up being set 
to 0. However it's not hard to save it and re-synthesize it so I can send a new 
patch that does that.

- Cosmin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] checkasm: print bench runs when benchmarking

2024-05-21 Thread J. Dekker


Lynne via ffmpeg-devel  writes:

> Helps make sense of the possible noise in the results.
> ---
>  tests/checkasm/checkasm.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 669f2be9c1..b1c175b95d 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -892,6 +892,9 @@ int main(int argc, char *argv[])
>  fprintf(stderr, "checkasm: using random seed %u\n", seed);
>  av_lfg_init(&checkasm_lfg, seed);
>  
> +if (state.bench_pattern)
> +fprintf(stderr, "checkasm: bench runs %" PRIu64 " (1 << %i)\n", 
> bench_runs, av_log2(bench_runs));
> +
>  check_cpu_flag(NULL, 0);
>  for (i = 0; cpus[i].flag; i++)
>  check_cpu_flag(cpus[i].name, cpus[i].flag);

LGTM.

-- 
jd
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] checkasm: print bench runs when benchmarking

2024-05-21 Thread Lynne via ffmpeg-devel
Helps make sense of the possible noise in the results.
---
 tests/checkasm/checkasm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 669f2be9c1..b1c175b95d 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -892,6 +892,9 @@ int main(int argc, char *argv[])
 fprintf(stderr, "checkasm: using random seed %u\n", seed);
 av_lfg_init(&checkasm_lfg, seed);
 
+if (state.bench_pattern)
+fprintf(stderr, "checkasm: bench runs %" PRIu64 " (1 << %i)\n", 
bench_runs, av_log2(bench_runs));
+
 check_cpu_flag(NULL, 0);
 for (i = 0; cpus[i].flag; i++)
 check_cpu_flag(cpus[i].name, cpus[i].flag);
-- 
2.43.0.381.gb435a96ce8
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v3] checkasm: add sample argument to adjust during bench

2024-05-21 Thread J. Dekker


Lynne via ffmpeg-devel  writes:

> [[PGP Signed Part:Undecided]]
> On 21/05/2024 15:51, J. Dekker wrote:
>> Some timers on certain device and test combinations can produce noisy
>> results, affecting the reliability of performance measurements. One
>> notable example of this is the Canaan K230 RISC-V development board.
>> An option to adjust the number of samples by an exponent (--runs) has
>> been added, allowing developers to increase the sample count for more
>> reliable results.
>> Signed-off-by: J. Dekker 
>> ---
>>   tests/checkasm/checkasm.c | 16 +++-
>>   tests/checkasm/checkasm.h |  7 ---
>>   2 files changed, 19 insertions(+), 4 deletions(-)>
> Tested, works as intended
> LGTM, thanks

Thanks pushed with fixed commit message.

-- 
jd
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4 2/2][GSoC 2024] tests/checkasm: Add check_vvc_sad to vvc_mc.c

2024-05-21 Thread Ronald S. Bultje
Hi,

On Sun, May 19, 2024 at 8:55 PM Stone Chen  wrote:

> Adds checkasm for DMVR SAD AVX2 implementation.
>
> Benchmarks ( AMD 7940HS )
> vvc_sad_8x8_c: 70.0
> vvc_sad_8x8_avx2: 10.0
> vvc_sad_16x16_c: 280.0
> vvc_sad_16x16_avx2: 20.0
> vvc_sad_32x32_c: 1020.0
> vvc_sad_32x32_avx2: 70.0
> vvc_sad_64x64_c: 3560.0
> vvc_sad_64x64_avx2: 270.0
> vvc_sad_128x128_c: 13760.0
> vvc_sad_128x128_avx2: 1070.0
> ---
>  tests/checkasm/vvc_mc.c | 38 ++
>  1 file changed, 38 insertions(+)
>

It appears Remi's performance concerns have been addressed separately, so
this patch is good to go.

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v3] checkasm: add sample argument to adjust during bench

2024-05-21 Thread Lynne via ffmpeg-devel

On 21/05/2024 15:51, J. Dekker wrote:

Some timers on certain device and test combinations can produce noisy
results, affecting the reliability of performance measurements. One
notable example of this is the Canaan K230 RISC-V development board.

An option to adjust the number of samples by an exponent (--runs) has
been added, allowing developers to increase the sample count for more
reliable results.

Signed-off-by: J. Dekker 
---
  tests/checkasm/checkasm.c | 16 +++-
  tests/checkasm/checkasm.h |  7 ---
  2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 31ca9f6e2b..669f2be9c1 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -72,6 +72,9 @@
  void (*checkasm_checked_call)(void *func, int dummy, ...) = 
checkasm_checked_call_novfp;
  #endif
  
+/* Trade-off between speed and accuracy */

+uint64_t bench_runs = 1U << 10;
+
  /* List of tests to invoke */
  static const struct {
  const char *name;
@@ -820,7 +823,7 @@ static void bench_uninit(void)
  static int usage(const char *path)
  {
  fprintf(stderr,
-"Usage: %s [--bench] [--test=] [--verbose] [seed]\n",
+"Usage: %s [--bench] [--runs=] [--test=] [--verbose] 
[seed]\n",
  path);
  return 1;
  }
@@ -867,6 +870,17 @@ int main(int argc, char *argv[])
  state.test_name = arg + 7;
  } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
  state.verbose = 1;
+} else if (!strncmp(arg, "--runs=", 7)) {
+l = strtoul(arg + 7, &end, 10);
+if (*end == '\0') {
+if (l > 30) {
+fprintf(stderr, "checkasm: error: runs exponent must be within the 
range 0 <= 30\n");
+usage(argv[0]);
+}
+bench_runs = 1U << l;
+} else {
+return usage(argv[0]);
+}
  } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX &&
 *end == '\0') {
  seed = l;
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 07fcc751ff..e05053cbf6 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -167,7 +167,7 @@ extern AVLFG checkasm_lfg;
  
  static av_unused void *func_ref, *func_new;
  
-#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */

+extern uint64_t bench_runs;
  
  /* Decide whether or not the specified function needs to be tested */

  #define check_func(func, ...) (checkasm_save_context(), func_ref = 
checkasm_check_func((func_new = func), __VA_ARGS__))
@@ -336,10 +336,11 @@ typedef struct CheckasmPerf {
  av_unused const int sysfd = perf->sysfd;\
  func_type *tfunc = func_new;\
  uint64_t tsum = 0;\
-int ti, tcount = 0;\
+uint64_t ti, tcount = 0;\
  uint64_t t = 0; \
+const uint64_t truns = bench_runs;\
  checkasm_set_signal_handler_state(1);\
-for (ti = 0; ti < BENCH_RUNS; ti++) {\
+for (ti = 0; ti < truns; ti++) {\
  PERF_START(t);\
  tfunc(__VA_ARGS__);\
  tfunc(__VA_ARGS__);\


Tested, works as intended
LGTM, thanks


OpenPGP_0xA2FEA5F03F034464.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] x86/vvc_alf: use the x86inc instruction macros

2024-05-21 Thread James Almer
Let its magic figure out the correct mnemonic based on target instruction set.

Signed-off-by: James Almer 
---
 libavcodec/x86/vvc/vvc_alf.asm | 202 -
 1 file changed, 101 insertions(+), 101 deletions(-)

diff --git a/libavcodec/x86/vvc/vvc_alf.asm b/libavcodec/x86/vvc/vvc_alf.asm
index b3d118962f..71e821c27b 100644
--- a/libavcodec/x86/vvc/vvc_alf.asm
+++ b/libavcodec/x86/vvc/vvc_alf.asm
@@ -73,15 +73,15 @@ SECTION .text
 ;m%2 = 07 06 05 04
 ;m%3 = 11 10 09 08
 
-vshufpd m%5, m%1, m%2, 0011b;06 02 05 01
-vshufpd m%6, m%3, m%5, 1001b;06 10 01 09
+shufpd  m%5, m%1, m%2, 0011b;06 02 05 01
+shufpd  m%6, m%3, m%5, 1001b;06 10 01 09
 
-vshufpd m%1, m%1, m%6, 1100b;06 03 09 00
-vshufpd m%2, m%2, m%6, 0110b;10 07 01 04
-vshufpd m%3, m%3, m%5, 0110b;02 11 05 08
+shufpd  m%1, m%1, m%6, 1100b;06 03 09 00
+shufpd  m%2, m%2, m%6, 0110b;10 07 01 04
+shufpd  m%3, m%3, m%5, 0110b;02 11 05 08
 
 vpermpd m%1, m%1, 0000b ;09 06 03 00
-vshufpd m%2, m%2, m%2, 1001b;10 07 04 01
+shufpd  m%2, m%2, m%2, 1001b;10 07 04 01
 vpermpd m%3, m%3, 1111b ;11 08 05 02
 %endmacro
 
@@ -125,21 +125,21 @@ SECTION .text
 pxor m11, m11
 psubwm11, m12;-clip
 
-vpsubwm9, m2
+psubw m9, m2
 CLIPW m9, m11, m12
 
-vpsubw   m10, m2
+psubwm10, m2
 CLIPWm10, m11, m12
 
-vpunpckhwd   m13, m9, m10
-vpunpcklwdm9, m9, m10
+punpckhwdm13, m9, m10
+punpcklwd m9, m9, m10
 
 pshufb   m12, filters, [param_shuffe_ %+ i]   ;filter
-vpunpcklwd   m10, m12, m12
-vpunpckhwd   m12, m12, m12
+punpcklwdm10, m12, m12
+punpckhwdm12, m12, m12
 
-vpmaddwd  m9, m10
-vpmaddwd m12, m13
+pmaddwd   m9, m10
+pmaddwd  m12, m13
 
 paddd m0, m9
 paddd m1, m12
@@ -268,17 +268,17 @@ SECTION .text
 je %%near_vb
 %endif
 %%no_vb:
-vpsradm0, SHIFT
-vpsradm1, SHIFT
+psrad m0, SHIFT
+psrad m1, SHIFT
 jmp  %%shift_end
 %%near_vb:
 vpbroadcastd  m9, [dd448]
 paddd m0, m9
 paddd m1, m9
-vpsradm0, SHIFT + 3
-vpsradm1, SHIFT + 3
+psrad m0, SHIFT + 3
+psrad m1, SHIFT + 3
 %%shift_end:
-vpackssdw m0, m0, m1
+packssdw  m0, m0, m1
 %endmacro
 
 ; FILTER_VB(line)
@@ -320,7 +320,7 @@ SECTION .text
 %if ps == 2
 movu  %1, %2
 %else
-vpmovzxbw %1, %2
+pmovzxbw  %1, %2
 %endif
 %endmacro
 
@@ -329,7 +329,7 @@ SECTION .text
 %if ps == 2
 movu %1, m%2
 %else
-vpackuswb   m%2, m%2
+packuswbm%2, m%2
 vpermq  m%2, m%2, 0x8
 movu %1, xm%2
 %endif
@@ -489,43 +489,43 @@ cglobal vvc_alf_classify_grad_%1bpc, 6, 14, 16, 
gradient_sum, src, src_stride, w
 LOAD_PIXELS   m6, [s2q + 2 * ps]
 LOAD_PIXELS   m7, [s3q + 2 * ps]
 
-vpblendw  m8, m0, m1, 0xaa ; nw
-vpblendw  m9, m0, m5, 0x55 ; n
-vpblendw m10, m4, m5, 0xaa ; ne
-vpblendw m11, m1, m2, 0xaa ; w
-vpblendw m12, m5, m6, 0xaa ; e
-vpblendw m13, m2, m3, 0xaa ; sw
-vpblendw m14, m2, m7, 0x55 ; s
+pblendw   m8, m0, m1, 0xaa ; nw
+pblendw   m9, m0, m5, 0x55 ; n
+pblendw  m10, m4, m5, 0xaa ; ne
+pblendw  m11, m1, m2, 0xaa ; w
+pblendw  m12, m5, m6, 0xaa ; e
+pblendw  m13, m2, m3, 0xaa ; sw
+pblendw  m14, m2, m7, 0x55 ; s
 
-vpblendw  m0, m1, m6, 0x55
-vpaddwm0, m0   ; c
+pblendw   m0, m1, m6, 0x55
+paddw m0, m0   ; c
 
 movu  m1, [CLASSIFY_SHUFFE]
 pshufbm1, m0, m1   ; d
 
-vpaddwm9, m14  ; n + s
-vpsubwm9, m0   ; (n + s) - c
-vpabswm9, m9   ; ver
+paddw m9, m14  ; n + s
+   

[FFmpeg-devel] [PATCH v3] checkasm: add sample argument to adjust during bench

2024-05-21 Thread J. Dekker
Some timers on certain device and test combinations can produce noisy
results, affecting the reliability of performance measurements. One
notable example of this is the Canaan K230 RISC-V development board.

An option to adjust the number of samples by an exponent (--runs) has
been added, allowing developers to increase the sample count for more
reliable results.

Signed-off-by: J. Dekker 
---
 tests/checkasm/checkasm.c | 16 +++-
 tests/checkasm/checkasm.h |  7 ---
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 31ca9f6e2b..669f2be9c1 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -72,6 +72,9 @@
 void (*checkasm_checked_call)(void *func, int dummy, ...) = 
checkasm_checked_call_novfp;
 #endif
 
+/* Trade-off between speed and accuracy */
+uint64_t bench_runs = 1U << 10;
+
 /* List of tests to invoke */
 static const struct {
 const char *name;
@@ -820,7 +823,7 @@ static void bench_uninit(void)
 static int usage(const char *path)
 {
 fprintf(stderr,
-"Usage: %s [--bench] [--test=] [--verbose] [seed]\n",
+"Usage: %s [--bench] [--runs=] [--test=] 
[--verbose] [seed]\n",
 path);
 return 1;
 }
@@ -867,6 +870,17 @@ int main(int argc, char *argv[])
 state.test_name = arg + 7;
 } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
 state.verbose = 1;
+} else if (!strncmp(arg, "--runs=", 7)) {
+l = strtoul(arg + 7, &end, 10);
+if (*end == '\0') {
+if (l > 30) {
+fprintf(stderr, "checkasm: error: runs exponent must be 
within the range 0 <= 30\n");
+usage(argv[0]);
+}
+bench_runs = 1U << l;
+} else {
+return usage(argv[0]);
+}
 } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX &&
*end == '\0') {
 seed = l;
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 07fcc751ff..e05053cbf6 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -167,7 +167,7 @@ extern AVLFG checkasm_lfg;
 
 static av_unused void *func_ref, *func_new;
 
-#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
+extern uint64_t bench_runs;
 
 /* Decide whether or not the specified function needs to be tested */
 #define check_func(func, ...) (checkasm_save_context(), func_ref = 
checkasm_check_func((func_new = func), __VA_ARGS__))
@@ -336,10 +336,11 @@ typedef struct CheckasmPerf {
 av_unused const int sysfd = perf->sysfd;\
 func_type *tfunc = func_new;\
 uint64_t tsum = 0;\
-int ti, tcount = 0;\
+uint64_t ti, tcount = 0;\
 uint64_t t = 0; \
+const uint64_t truns = bench_runs;\
 checkasm_set_signal_handler_state(1);\
-for (ti = 0; ti < BENCH_RUNS; ti++) {\
+for (ti = 0; ti < truns; ti++) {\
 PERF_START(t);\
 tfunc(__VA_ARGS__);\
 tfunc(__VA_ARGS__);\
-- 
2.42.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2] checkasm: add sample argument to adjust during bench

2024-05-21 Thread Lynne via ffmpeg-devel

On 21/05/2024 14:32, J. Dekker wrote:

Some timers on certain device and test combinations can produce noisy
results, affecting the reliability of performance measurements. One
notable example of this is the Canaan K230 RISC-V development board.

An option to adjust the number of samples (--samples) has been added,
allowing developers to increase or adjust the sample count for more
reliable results.

Signed-off-by: J. Dekker 
---

  Auto-detection can be added later when either a count is omitted or a specific
  value or term such as '0' or 'auto' is provided. This is a development tool,
  the users will be developers primarily working on master who follow checkasm
  changes and/ or add their own tests and functionality; there's no need to
  support a feature like this or deprecate it for years if a better solution
  is submitted.

  tests/checkasm/checkasm.c | 12 +++-
  tests/checkasm/checkasm.h |  5 +++--
  2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 31ca9f6e2b..b8e5cfb9dd 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -72,6 +72,9 @@
  void (*checkasm_checked_call)(void *func, int dummy, ...) = 
checkasm_checked_call_novfp;
  #endif
  
+/* Trade-off between speed and accuracy */

+uint64_t bench_runs = 1000;
+
  /* List of tests to invoke */
  static const struct {
  const char *name;
@@ -820,7 +823,7 @@ static void bench_uninit(void)
  static int usage(const char *path)
  {
  fprintf(stderr,
-"Usage: %s [--bench] [--test=] [--verbose] [seed]\n",
+"Usage: %s [--bench] [--samples=] [--test=] [--verbose] 
[seed]\n",
  path);
  return 1;
  }
@@ -867,6 +870,13 @@ int main(int argc, char *argv[])
  state.test_name = arg + 7;
  } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
  state.verbose = 1;
+} else if (!strncmp(arg, "--samples=", 10)) {
+l = strtoul(arg + 10, &end, 10);
+if (*end == '\0') {
+bench_runs = l;
+} else {
+return usage(argv[0]);
+}
  } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX &&
 *end == '\0') {
  seed = l;
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 07fcc751ff..d6921cc50c 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -167,7 +167,7 @@ extern AVLFG checkasm_lfg;
  
  static av_unused void *func_ref, *func_new;
  
-#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */

+extern uint64_t bench_runs;
  
  /* Decide whether or not the specified function needs to be tested */

  #define check_func(func, ...) (checkasm_save_context(), func_ref = 
checkasm_check_func((func_new = func), __VA_ARGS__))
@@ -338,8 +338,9 @@ typedef struct CheckasmPerf {
  uint64_t tsum = 0;\
  int ti, tcount = 0;\
  uint64_t t = 0; \
+const uint64_t truns = bench_runs;\
  checkasm_set_signal_handler_state(1);\
-for (ti = 0; ti < BENCH_RUNS; ti++) {\
+for (ti = 0; ti < truns; ti++) {\
  PERF_START(t);\
  tfunc(__VA_ARGS__);\
  tfunc(__VA_ARGS__);\


While working on the FFT asm with
https://github.com/cyanreg/lavu_fft_test which has a built-in benchmark, 
I've found that exponentiation works best, as adding more and more 
digits at the end is prone to under/overshoot. For large functions, 1 << 
16 is a good starting point, while for very small functions, 1 << 23 
becomes more optimal.


I suggest replacing --samples with --runs (or --bench-runs, but we're 
all lazy for that), and documenting it as "--runs=" and rejecting 
anything large enough to overflow.


OpenPGP_0xA2FEA5F03F034464.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2] checkasm: add sample argument to adjust during bench

2024-05-21 Thread Henrik Gramner via ffmpeg-devel
On Tue, May 21, 2024 at 2:33 PM J. Dekker  wrote:
> @@ -338,8 +338,9 @@ typedef struct CheckasmPerf {
>  uint64_t tsum = 0;\
>  int ti, tcount = 0;\
>  uint64_t t = 0; \
> +const uint64_t truns = bench_runs;\
>  checkasm_set_signal_handler_state(1);\
> -for (ti = 0; ti < BENCH_RUNS; ti++) {\
> +for (ti = 0; ti < truns; ti++) {\

This is comparing int with uint64_t. We should probably just use int
for the sample count too.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] checkasm: vvc_alf: Limit benchmarking to a reasonable subset of functions

2024-05-21 Thread Nuo Mi
On Tue, May 21, 2024 at 6:16 PM Martin Storsjö  wrote:

> On Tue, 21 May 2024, Martin Storsjö wrote:
>
> > Don't benchmark every single combination of widths and heights;
> > only benchmark cases which are squares (like in vvc_mc.c).
> >
> > Contrary to vvc_mc, which increases sizes by doubling dimensions,
> > vvc_alf tests all sizes in increments of 4. Limit benchmarking to
> > the cases which are powers of two.
> >
> > This reduces the number of benchmarked cases from 3072 down to 18.
> > ---
> > tests/checkasm/vvc_alf.c | 11 ---
> > 1 file changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
> > index 9526260598..6dd89bfafc 100644
> > --- a/tests/checkasm/vvc_alf.c
> > +++ b/tests/checkasm/vvc_alf.c
> > @@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> > if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> > fail();
> > }
> > -bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> > +// Bench only square sizes, and ones with dimensions
> being a power of two.
> > +if (w == h && (w & (w - 1)) == 0)
> > +bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> > }
> > if (check_func(c->alf.filter[CHROMA],
> "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
> > const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
> > @@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> > if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> > fail();
> > }
> > -bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> > +if (w == h && (w & (w - 1)) == 0)
> > +bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> > }
> > }
> > }
> > @@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c,
> const int bit_depth)
> > fail();
> > if (memcmp(transpose_idx0, transpose_idx1, id_size))
> > fail();
> > -bench_new(class_idx1, transpose_idx1, src1 + offset,
> stride, w, h, vb_pos, alf_gradient_tmp);
> > +// Bench only square sizes, and ones with dimensions
> being a power of two.
> > +if (w == h && (w & (w - 1)) == 0)
> > +bench_new(class_idx1, transpose_idx1, src1 +
> offset, stride, w, h, vb_pos, alf_gradient_tmp);
>
> Note, these tests (all vvc tests) use check_func("name...%dx%d", w, h) -
> while it's common elsewhere to group them up into slightly bigger bundles,
> e.g. only including the width in the function name, assuming that this is
> the level of granularity of actual assembly implementations - e.g. I don't
> think there would be a different codepath implemented for each block
> height.
>
> And it's possible to convey more information about exactly what failed,
> without needing to encode it into the function name - see the
> checkasm_check functions/macro, and e.g. commit
> 8ff4a4a4f4f73c5e276fa0cbe6cd5a148ebdd4ae.
>
Hi Martin,
Thank you for the suggestion.
Tracked with https://github.com/ffvvc/FFmpeg/issues/226

>
> // Martin
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2] checkasm: add sample argument to adjust during bench

2024-05-21 Thread J. Dekker
Some timers on certain device and test combinations can produce noisy
results, affecting the reliability of performance measurements. One
notable example of this is the Canaan K230 RISC-V development board.

An option to adjust the number of samples (--samples) has been added,
allowing developers to increase or adjust the sample count for more
reliable results.

Signed-off-by: J. Dekker 
---

 Auto-detection can be added later when either a count is omitted or a specific
 value or term such as '0' or 'auto' is provided. This is a development tool,
 the users will be developers primarily working on master who follow checkasm
 changes and/ or add their own tests and functionality; there's no need to
 support a feature like this or deprecate it for years if a better solution
 is submitted.

 tests/checkasm/checkasm.c | 12 +++-
 tests/checkasm/checkasm.h |  5 +++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 31ca9f6e2b..b8e5cfb9dd 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -72,6 +72,9 @@
 void (*checkasm_checked_call)(void *func, int dummy, ...) = 
checkasm_checked_call_novfp;
 #endif
 
+/* Trade-off between speed and accuracy */
+uint64_t bench_runs = 1000;
+
 /* List of tests to invoke */
 static const struct {
 const char *name;
@@ -820,7 +823,7 @@ static void bench_uninit(void)
 static int usage(const char *path)
 {
 fprintf(stderr,
-"Usage: %s [--bench] [--test=] [--verbose] [seed]\n",
+"Usage: %s [--bench] [--samples=] [--test=] 
[--verbose] [seed]\n",
 path);
 return 1;
 }
@@ -867,6 +870,13 @@ int main(int argc, char *argv[])
 state.test_name = arg + 7;
 } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
 state.verbose = 1;
+} else if (!strncmp(arg, "--samples=", 10)) {
+l = strtoul(arg + 10, &end, 10);
+if (*end == '\0') {
+bench_runs = l;
+} else {
+return usage(argv[0]);
+}
 } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX &&
*end == '\0') {
 seed = l;
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 07fcc751ff..d6921cc50c 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -167,7 +167,7 @@ extern AVLFG checkasm_lfg;
 
 static av_unused void *func_ref, *func_new;
 
-#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
+extern uint64_t bench_runs;
 
 /* Decide whether or not the specified function needs to be tested */
 #define check_func(func, ...) (checkasm_save_context(), func_ref = 
checkasm_check_func((func_new = func), __VA_ARGS__))
@@ -338,8 +338,9 @@ typedef struct CheckasmPerf {
 uint64_t tsum = 0;\
 int ti, tcount = 0;\
 uint64_t t = 0; \
+const uint64_t truns = bench_runs;\
 checkasm_set_signal_handler_state(1);\
-for (ti = 0; ti < BENCH_RUNS; ti++) {\
+for (ti = 0; ti < truns; ti++) {\
 PERF_START(t);\
 tfunc(__VA_ARGS__);\
 tfunc(__VA_ARGS__);\
-- 
2.42.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 18/18] Changelog: add DVB compatible information for VVC decoder

2024-05-21 Thread Nuo Mi
On Sun, May 19, 2024 at 9:39 PM Nuo Mi  wrote:

> see
> https://dvb.org/specifications/verification-validation/vvc-test-content/
> ---
>  Changelog | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/Changelog b/Changelog
> index dd25715d6b..12770e4296 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -10,6 +10,7 @@ version :
>  - vf_scale supports secondary ref input and framesync options
>  - vf_scale2ref deprecated
>  - qsv_params option added for QSV encoders
> +- VVC decoder compatible with DVB test content
>
Applied.
Thank you.

>
>
>  version 7.0:
> --
> 2.34.1
>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] checkasm: vvc_alf: Limit benchmarking to a reasonable subset of functions

2024-05-21 Thread Nuo Mi
On Tue, May 21, 2024 at 7:11 PM Rémi Denis-Courmont  wrote:

>
>
> Le 21 mai 2024 13:04:29 GMT+03:00, "Martin Storsjö"  a
> écrit :
> >Don't benchmark every single combination of widths and heights;
> >only benchmark cases which are squares (like in vvc_mc.c).
> >
> >Contrary to vvc_mc, which increases sizes by doubling dimensions,
> >vvc_alf tests all sizes in increments of 4. Limit benchmarking to
> >the cases which are powers of two.
> >
> >This reduces the number of benchmarked cases from 3072 down to 18.
> >---
> > tests/checkasm/vvc_alf.c | 11 ---
> > 1 file changed, 8 insertions(+), 3 deletions(-)
> >
> >diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
> >index 9526260598..6dd89bfafc 100644
> >--- a/tests/checkasm/vvc_alf.c
> >+++ b/tests/checkasm/vvc_alf.c
> >@@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> > if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> > fail();
> > }
> >-bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> >+// Bench only square sizes, and ones with dimensions
> being a power of two.
> >+if (w == h && (w & (w - 1)) == 0)
> >+bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> > }
> > if (check_func(c->alf.filter[CHROMA],
> "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
> > const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
> >@@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> > if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> > fail();
> > }
> >-bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> >+if (w == h && (w & (w - 1)) == 0)
> >+bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> > }
> > }
> > }
> >@@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c,
> const int bit_depth)
> > fail();
> > if (memcmp(transpose_idx0, transpose_idx1, id_size))
> > fail();
> >-bench_new(class_idx1, transpose_idx1, src1 + offset,
> stride, w, h, vb_pos, alf_gradient_tmp);
> >+// Bench only square sizes, and ones with dimensions
> being a power of two.
> >+if (w == h && (w & (w - 1)) == 0)
> >+bench_new(class_idx1, transpose_idx1, src1 + offset,
> stride, w, h, vb_pos, alf_gradient_tmp);
> > }
> > }
> > }
>
> LGTM.
>
Applied.
Thank you, Martin and Remi.

>
> By the way, does anybody know if we could skip benchmarking C functions
> for which zero optimisations are available ? We are not printing the
> benchmark results in that case, so that wouldn't be a loss.
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 3/6] lavf/tls_mbedtls: hook up debug message callback

2024-05-21 Thread Rémi Denis-Courmont


Le 21 mai 2024 13:14:59 GMT+03:00, sfan5  a écrit :
>Am 17.05.24 um 11:51 schrieb Rémi Denis-Courmont:
>> Le 17 mai 2024 11:34:35 GMT+03:00, Sfan5  a écrit :
>>> Signed-off-by: sfan5
>>> ---
>>> libavformat/tls_mbedtls.c | 14 ++
>>> 1 file changed, 14 insertions(+)
>>> 
>>> diff --git a/libavformat/tls_mbedtls.c b/libavformat/tls_mbedtls.c
>>> index 24c3afd94c..9508fe3436 100644
>>> --- a/libavformat/tls_mbedtls.c
>>> +++ b/libavformat/tls_mbedtls.c
>>> @@ -26,6 +26,7 @@
>>> #include 
>>> #include 
>>> #include 
>>> +#include 
>>> #ifdef MBEDTLS_PSA_CRYPTO_C
>>> #include 
>>> #endif
>>> @@ -36,6 +37,7 @@
>>> #include "tls.h"
>>> #include "libavutil/mem.h"
>>> #include "libavutil/parseutils.h"
>>> +#include "libavutil/avstring.h"
>>>   typedef struct TLSContext {
>>>  const AVClass *class;
>>> @@ -112,6 +114,13 @@ static int mbedtls_recv(void *ctx, unsigned char *buf, 
>>> size_t len)
>>>  return handle_transport_error(h, "ffurl_read", 
>>> MBEDTLS_ERR_SSL_WANT_READ, ret);
>>> }
>>> +static void mbedtls_debug(void *ctx, int lvl, const char *file, int line, 
>>> const char *msg)
>>> +{
>>> +URLContext *h = (URLContext*) ctx;
>>> +int av_lvl = lvl >= 4 ? AV_LOG_TRACE : AV_LOG_DEBUG;
>>> +av_log(h, av_lvl, "%s:%d: %s", av_basename(file), line, msg);
>>> +}
>>> +
>>> static void handle_pk_parse_error(URLContext *h, int ret)
>>> {
>>>  switch (ret) {
>>> @@ -201,6 +210,11 @@ static int tls_open(URLContext *h, const char *uri, 
>>> int flags, AVDictionary **op
>>>  mbedtls_x509_crt_init(&tls_ctx->ca_cert);
>>>  mbedtls_pk_init(&tls_ctx->priv_key);
>>> +if (av_log_get_level() >= AV_LOG_DEBUG) {
>>> +mbedtls_ssl_conf_dbg(&tls_ctx->ssl_config, mbedtls_debug, 
>>> shr->tcp);
>>> +mbedtls_debug_set_threshold(4); // maximum
>> This doesn't look thread-safe / reentrant.
>
>Indeed. But what alternative is there?
>
>mbedTLS provides only this mechanism to get debug messages from it.

Either this is made thread-safe, e.g. using an atomic variable inside the 
library, or we can't have that here, as callers will expect thread-safe 
semantics AFAIU.
>
>___
>ffmpeg-devel mailing list
>ffmpeg-devel@ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email
>ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] checkasm: h264dsp: Avoid out of buffer writes when benchmarking

2024-05-21 Thread Rémi Denis-Courmont


Le 21 mai 2024 14:04:53 GMT+03:00, "Martin Storsjö"  a écrit :
>The loop filters can write before the pointer given to them;
>the actual test invocations correctly used an offset, while
>the benchmark calls were lacking an offset. Therefore, when
>running with benchmarking, these tests could have spurious
>failures.
>---
> tests/checkasm/h264dsp.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
>diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
>index 0cc1f32740..67b8dce53c 100644
>--- a/tests/checkasm/h264dsp.c
>+++ b/tests/checkasm/h264dsp.c
>@@ -362,7 +362,7 @@ static void check_loop_filter(void)
> tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
> fail(); \
> }   \
>-bench_new(dst1, 32, alphas[j], betas[j], tc0[j]);   \
>+bench_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]);\
> }   \
> }   \
> } while (0)
>@@ -421,7 +421,7 @@ static void check_loop_filter_intra(void)
> j, alphas[j], betas[j]);\
> fail(); \
> }   \
>-bench_new(dst1, 32, alphas[j], betas[j]);   \
>+bench_new(dst1 + off, 32, alphas[j], betas[j]); \
> }   \
> }   \
> } while (0)

LGTM
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] checkasm: vvc_alf: Limit benchmarking to a reasonable subset of functions

2024-05-21 Thread Rémi Denis-Courmont


Le 21 mai 2024 13:04:29 GMT+03:00, "Martin Storsjö"  a écrit :
>Don't benchmark every single combination of widths and heights;
>only benchmark cases which are squares (like in vvc_mc.c).
>
>Contrary to vvc_mc, which increases sizes by doubling dimensions,
>vvc_alf tests all sizes in increments of 4. Limit benchmarking to
>the cases which are powers of two.
>
>This reduces the number of benchmarked cases from 3072 down to 18.
>---
> tests/checkasm/vvc_alf.c | 11 ---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
>diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
>index 9526260598..6dd89bfafc 100644
>--- a/tests/checkasm/vvc_alf.c
>+++ b/tests/checkasm/vvc_alf.c
>@@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const int 
>bit_depth)
> if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, 
> w * SIZEOF_PIXEL))
> fail();
> }
>-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
>filter, clip, vb_pos);
>+// Bench only square sizes, and ones with dimensions being a 
>power of two.
>+if (w == h && (w & (w - 1)) == 0)
>+bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
>h, filter, clip, vb_pos);
> }
> if (check_func(c->alf.filter[CHROMA], 
> "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
> const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
>@@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const int 
>bit_depth)
> if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, 
> w * SIZEOF_PIXEL))
> fail();
> }
>-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
>filter, clip, vb_pos);
>+if (w == h && (w & (w - 1)) == 0)
>+bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
>h, filter, clip, vb_pos);
> }
> }
> }
>@@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c, const int 
>bit_depth)
> fail();
> if (memcmp(transpose_idx0, transpose_idx1, id_size))
> fail();
>-bench_new(class_idx1, transpose_idx1, src1 + offset, stride, 
>w, h, vb_pos, alf_gradient_tmp);
>+// Bench only square sizes, and ones with dimensions being a 
>power of two.
>+if (w == h && (w & (w - 1)) == 0)
>+bench_new(class_idx1, transpose_idx1, src1 + offset, 
>stride, w, h, vb_pos, alf_gradient_tmp);
> }
> }
> }

LGTM.

By the way, does anybody know if we could skip benchmarking C functions for 
which zero optimisations are available ? We are not printing the benchmark 
results in that case, so that wouldn't be a loss.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] checkasm: h264dsp: Avoid out of buffer writes when benchmarking

2024-05-21 Thread Martin Storsjö
The loop filters can write before the pointer given to them;
the actual test invocations correctly used an offset, while
the benchmark calls were lacking an offset. Therefore, when
running with benchmarking, these tests could have spurious
failures.
---
 tests/checkasm/h264dsp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index 0cc1f32740..67b8dce53c 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -362,7 +362,7 @@ static void check_loop_filter(void)
 tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
 fail(); \
 }   \
-bench_new(dst1, 32, alphas[j], betas[j], tc0[j]);   \
+bench_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]);\
 }   \
 }   \
 } while (0)
@@ -421,7 +421,7 @@ static void check_loop_filter_intra(void)
 j, alphas[j], betas[j]);\
 fail(); \
 }   \
-bench_new(dst1, 32, alphas[j], betas[j]);   \
+bench_new(dst1 + off, 32, alphas[j], betas[j]); \
 }   \
 }   \
 } while (0)
-- 
2.39.3 (Apple Git-146)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4 11/11] avfilter/vf_dnn_detect: Fix null pointer dereference

2024-05-21 Thread Guo, Yejun



> -Original Message-
> From: ffmpeg-devel  On Behalf Of
> Andreas Rheinhardt
> Sent: Tuesday, May 21, 2024 3:12 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v4 11/11] avfilter/vf_dnn_detect: Fix null
> pointer dereference
> 
> Zhao Zhili:
> > From: Zhao Zhili 
> >
> > Signed-off-by: Zhao Zhili 
> > ---
> >  libavfilter/vf_dnn_detect.c | 10 ++
> >  1 file changed, 6 insertions(+), 4 deletions(-)
> >
> > diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
> > index b4eee06fe7..2a277d4169 100644
> > --- a/libavfilter/vf_dnn_detect.c
> > +++ b/libavfilter/vf_dnn_detect.c
> > @@ -807,11 +807,13 @@ static av_cold void
> dnn_detect_uninit(AVFilterContext *context)
> >  DnnDetectContext *ctx = context->priv;
> >  AVDetectionBBox *bbox;
> >  ff_dnn_uninit(&ctx->dnnctx);
> > -while(av_fifo_can_read(ctx->bboxes_fifo)) {
> > -av_fifo_read(ctx->bboxes_fifo, &bbox, 1);
> > -av_freep(&bbox);
> > +if (ctx->bboxes_fifo) {
> > +while (av_fifo_can_read(ctx->bboxes_fifo)) {
> > +av_fifo_read(ctx->bboxes_fifo, &bbox, 1);
> > +av_freep(&bbox);
> > +}
> > +av_fifo_freep2(&ctx->bboxes_fifo);
> >  }
> > -av_fifo_freep2(&ctx->bboxes_fifo);
> >  av_freep(&ctx->anchors);
> >  free_detect_labels(ctx);
> >  }
> 
> Please apply this patch soon; there is no need to wait for the other patches.
> (I independently stumbled upon this and sent a patch of my own.)
> 
> - Andreas
> 
This patch 11 pushed, thanks.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 4/6] lavf/tls_mbedtls: fix handling of certification validation failures

2024-05-21 Thread sfan5

Am 18.05.24 um 21:53 schrieb Michael Niedermayer:

On Fri, May 17, 2024 at 10:34:41AM +0200, Sfan5 wrote:

We manually check the verification status after the handshake has completed
using mbedtls_ssl_get_verify_result(). However with VERIFY_REQUIRED
mbedtls_ssl_handshake() already returns an error, so this code is never
reached.
Fix that by using VERIFY_OPTIONAL, which performs the verification but
does not abort the handshake.

Signed-off-by: sfan5 
---
  libavformat/tls_mbedtls.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavformat/tls_mbedtls.c b/libavformat/tls_mbedtls.c
index 9508fe3436..67d5c568b9 100644
--- a/libavformat/tls_mbedtls.c
+++ b/libavformat/tls_mbedtls.c
@@ -263,8 +263,9 @@ static int tls_open(URLContext *h, const char *uri, int
flags, AVDictionary **op
  goto fail;
  }
  +// not VERIFY_REQUIRED because we manually check after handshake
  mbedtls_ssl_conf_authmode(&tls_ctx->ssl_config,
-  shr->verify ? MBEDTLS_SSL_VERIFY_REQUIRED :
MBEDTLS_SSL_VERIFY_NONE);
+  shr->verify ? MBEDTLS_SSL_VERIFY_OPTIONAL :
MBEDTLS_SSL_VERIFY_NONE);
  mbedtls_ssl_conf_rng(&tls_ctx->ssl_config, mbedtls_ctr_drbg_random,
&tls_ctx->ctr_drbg_context);
  mbedtls_ssl_conf_ca_chain(&tls_ctx->ssl_config, &tls_ctx->ca_cert,
NULL);

This patch looks corrupted by extra line breaks

[...]


Thanks for pointing that out.

It looks like years later Microsoft is still incapable of leaving 
patches intact... Will send as attachments for v2.


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/6] lavf/tls_mbedtls: handle more error codes for human-readable message

2024-05-21 Thread sfan5

Am 17.05.24 um 11:42 schrieb Andrew Sayers:

On Fri, May 17, 2024 at 10:34:26AM +0200, Sfan5 wrote:

Signed-off-by: sfan5 
---
  libavformat/tls_mbedtls.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/libavformat/tls_mbedtls.c b/libavformat/tls_mbedtls.c
index 1a182e735e..fd6ba0b1f5 100644
--- a/libavformat/tls_mbedtls.c
+++ b/libavformat/tls_mbedtls.c
@@ -138,6 +138,9 @@ static void handle_handshake_error(URLContext *h, int
ret)
  case MBEDTLS_ERR_SSL_HANDSHAKE_FAILURE:
  av_log(h, AV_LOG_ERROR, "TLS handshake failed.\n");
  break;
+case MBEDTLS_ERR_SSL_BAD_PROTOCOL_VERSION:
+av_log(h, AV_LOG_ERROR, "TLS protocol version mismatches.\n");

"... mismatch" or "... does not match" would be more readable than "mismatches".

The word "matches" can mean either "does match" or "plural of match".
It's technically valid to use "mismatches" to mean "does not match",
but in practice the word is only ever used to mean "plural of mismatch".


Alright. Will change for v2.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec/dovi_rpudec - correctly read el_bit_depth_minus8 when ext_mapping_idc is non-zero

2024-05-21 Thread Niklas Haas
On Tue, 21 May 2024 01:17:32 + Cosmin Stejerean via ffmpeg-devel 
 wrote:
> From: Cosmin Stejerean 
> 
> It looks like the el_bitdepth_minus8 value in the header can also encode
> ext_mapping_idc in the upper 8 bits.
> 
> Samples having a non-zero ext_mapping_idc fail validation currently because 
> the
> value returned is out of range. This bypasses this by currently ignoring the
> ext_mapping_idc and using only the lower 8 bits for el_bitdepth_minus8.

What is ext_mapping_idc? If it's signalled data that can't be
reconstructed, we need to store it somewhere into AVDOVIMetadata and
then re-synthesize it during encoding. Otherwise the RPU transcode will
be lossy.

> 
> ---
>  libavcodec/dovi_rpudec.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/libavcodec/dovi_rpudec.c b/libavcodec/dovi_rpudec.c
> index 7c7eda9d09..1b11ad3640 100644
> --- a/libavcodec/dovi_rpudec.c
> +++ b/libavcodec/dovi_rpudec.c
> @@ -411,7 +411,9 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, 
> size_t rpu_size,
>  
>  if ((hdr->rpu_format & 0x700) == 0) {
>  int bl_bit_depth_minus8 = get_ue_golomb_31(gb);
> -int el_bit_depth_minus8 = get_ue_golomb_31(gb);
> +// this can encode a two byte value, with higher byte being 
> ext_mapping_idc
> +// use only the lower byte for el_bit_depth_minus8
> +uint8_t el_bit_depth_minus8 = get_ue_golomb_long(gb) & 0xFF;
>  int vdr_bit_depth_minus8 = get_ue_golomb_31(gb);
>  VALIDATE(bl_bit_depth_minus8, 0, 8);
>  VALIDATE(el_bit_depth_minus8, 0, 8);
> -- 
> 2.42.1
> 
> 
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec/dovi - disable metadata compression by default

2024-05-21 Thread Niklas Haas
On Tue, 21 May 2024 04:03:43 + Cosmin Stejerean via ffmpeg-devel 
 wrote:
> From: Cosmin Stejerean 
> 
> not all clients support metadata compression, make this an option and off by
> default until we can verify output.
> 
> vdr_dm_metadata_changed = 0 case fails the DV verifier so force this to true
> for now until we can determine the correct output format for this case.

This approach seems reasonable to me, at least until we get those specs.

>
> 
> ---
>  libavcodec/dovi_rpu.h| 5 +
>  libavcodec/dovi_rpuenc.c | 8 ++--
>  libavcodec/libaomenc.c   | 1 +
>  libavcodec/libsvtav1.c   | 1 +
>  libavcodec/libx265.c | 1 +
>  5 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/dovi_rpu.h b/libavcodec/dovi_rpu.h
> index 8ce0c88e9d..fca30804ae 100644
> --- a/libavcodec/dovi_rpu.h
> +++ b/libavcodec/dovi_rpu.h
> @@ -71,6 +71,11 @@ typedef struct DOVIContext {
>  AVDOVIDmData *ext_blocks;
>  int num_ext_blocks;
>  
> +/**
> + * Enable metadata compression in the output. Currently this is 
> experimental.
> + */
> +int enable_compression;
> +
>  /**
>   * Private fields internal to dovi_rpu.c
>   */
> diff --git a/libavcodec/dovi_rpuenc.c b/libavcodec/dovi_rpuenc.c
> index 3c3e0f84c0..26ed25733a 100644
> --- a/libavcodec/dovi_rpuenc.c
> +++ b/libavcodec/dovi_rpuenc.c
> @@ -512,8 +512,12 @@ int ff_dovi_rpu_generate(DOVIContext *s, const 
> AVDOVIMetadata *metadata,
>  }
>  }
>  
> -vdr_dm_metadata_changed = !s->color || memcmp(s->color, color, 
> sizeof(*color));
> -use_prev_vdr_rpu = !memcmp(&s->vdr[vdr_rpu_id]->mapping, mapping, 
> sizeof(*mapping));
> +// the output when vdr_dm_metadata_changed is 0 fails the DV verifier
> +// force it to 1 until we can get some samples or documentation on 
> correct syntax
> +vdr_dm_metadata_changed = 1; // !s->color || memcmp(s->color, color, 
> sizeof(*color));
> +
> +// not all clients support metadata compression
> +use_prev_vdr_rpu = s->enable_compression && 
> !memcmp(&s->vdr[vdr_rpu_id]->mapping, mapping, sizeof(*mapping));
>  
>  buffer_size = 12 /* vdr seq info */ + 5 /* CRC32 + terminator */;
>  buffer_size += num_ext_blocks_v1 * 13;
> diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
> index dec74ebecd..c6104f5522 100644
> --- a/libavcodec/libaomenc.c
> +++ b/libavcodec/libaomenc.c
> @@ -1489,6 +1489,7 @@ static const AVOption options[] = {
>  { "still-picture", "Encode in single frame mode (typically used for 
> still AVIF images).", OFFSET(still_picture), AV_OPT_TYPE_BOOL, {.i64 = 0}, 
> -1, 1, VE },
>  { "dolbyvision", "Enable Dolby Vision RPU coding", 
> OFFSET(dovi.enable), AV_OPT_TYPE_BOOL, {.i64 = FF_DOVI_AUTOMATIC }, -1, 1, 
> VE, .unit = "dovi" },
>  {   "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_AUTOMATIC}, 
> .flags = VE, .unit = "dovi" },
> +{ "dv_enable_compression", "Enable Dolby Vision metadata compression", 
> OFFSET(dovi.enable_compression), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, VE },
>  { "enable-rect-partitions", "Enable rectangular partitions", 
> OFFSET(enable_rect_partitions), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
>  { "enable-1to4-partitions", "Enable 1:4/4:1 partitions", 
> OFFSET(enable_1to4_partitions), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
>  { "enable-ab-partitions",   "Enable ab shape partitions",
> OFFSET(enable_ab_partitions),   AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
> diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
> index 2fef8c8971..86bb6686dd 100644
> --- a/libavcodec/libsvtav1.c
> +++ b/libavcodec/libsvtav1.c
> @@ -733,6 +733,7 @@ static const AVOption options[] = {
>  
>  { "dolbyvision", "Enable Dolby Vision RPU coding", OFFSET(dovi.enable), 
> AV_OPT_TYPE_BOOL, {.i64 = FF_DOVI_AUTOMATIC }, -1, 1, VE, .unit = "dovi" },
>  {   "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_AUTOMATIC}, 
> .flags = VE, .unit = "dovi" },
> +{ "dv_enable_compression", "Enable Dolby Vision metadata compression", 
> OFFSET(dovi.enable_compression), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, VE },
>  
>  {NULL},
>  };
> diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
> index ac1dbc4f97..2a79a5e6da 100644
> --- a/libavcodec/libx265.c
> +++ b/libavcodec/libx265.c
> @@ -953,6 +953,7 @@ static const AVOption options[] = {
>  #if X265_BUILD >= 167
>  { "dolbyvision", "Enable Dolby Vision RPU coding", OFFSET(dovi.enable), 
> AV_OPT_TYPE_BOOL, {.i64 = FF_DOVI_AUTOMATIC }, -1, 1, VE, .unit = "dovi" },
>  {   "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_AUTOMATIC}, 
> .flags = VE, .unit = "dovi" },
> +{ "dv_enable_compression", "Enable Dolby Vision metadata compression", 
> OFFSET(dovi.enable_compression), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, VE },
>  #endif

Setting up an extra AVClass here seems more hassle than it's worth, but
maybe we could at least hide these options behind a preprocessor
definition so t

Re: [FFmpeg-devel] [PATCH] checkasm: vvc_alf: Limit benchmarking to a reasonable subset of functions

2024-05-21 Thread Martin Storsjö

On Tue, 21 May 2024, Martin Storsjö wrote:


Don't benchmark every single combination of widths and heights;
only benchmark cases which are squares (like in vvc_mc.c).

Contrary to vvc_mc, which increases sizes by doubling dimensions,
vvc_alf tests all sizes in increments of 4. Limit benchmarking to
the cases which are powers of two.

This reduces the number of benchmarked cases from 3072 down to 18.
---
tests/checkasm/vvc_alf.c | 11 ---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index 9526260598..6dd89bfafc 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w 
* SIZEOF_PIXEL))
fail();
}
-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
filter, clip, vb_pos);
+// Bench only square sizes, and ones with dimensions being a 
power of two.
+if (w == h && (w & (w - 1)) == 0)
+bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
}
if (check_func(c->alf.filter[CHROMA], 
"vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
@@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w 
* SIZEOF_PIXEL))
fail();
}
-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
filter, clip, vb_pos);
+if (w == h && (w & (w - 1)) == 0)
+bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
}
}
}
@@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c, const int 
bit_depth)
fail();
if (memcmp(transpose_idx0, transpose_idx1, id_size))
fail();
-bench_new(class_idx1, transpose_idx1, src1 + offset, stride, 
w, h, vb_pos, alf_gradient_tmp);
+// Bench only square sizes, and ones with dimensions being a 
power of two.
+if (w == h && (w & (w - 1)) == 0)
+bench_new(class_idx1, transpose_idx1, src1 + offset, 
stride, w, h, vb_pos, alf_gradient_tmp);


Note, these tests (all vvc tests) use check_func("name...%dx%d", w, h) - 
while it's common elsewhere to group them up into slightly bigger bundles, 
e.g. only including the width in the function name, assuming that this is 
the level of granularity of actual assembly implementations - e.g. I don't 
think there would be a different codepath implemented for each block 
height.


And it's possible to convey more information about exactly what failed, 
without needing to encode it into the function name - see the 
checkasm_check functions/macro, and e.g. commit 
8ff4a4a4f4f73c5e276fa0cbe6cd5a148ebdd4ae.


// Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 3/6] lavf/tls_mbedtls: hook up debug message callback

2024-05-21 Thread sfan5

Am 17.05.24 um 11:51 schrieb Rémi Denis-Courmont:

Le 17 mai 2024 11:34:35 GMT+03:00, Sfan5  a écrit :

Signed-off-by: sfan5
---
libavformat/tls_mbedtls.c | 14 ++
1 file changed, 14 insertions(+)

diff --git a/libavformat/tls_mbedtls.c b/libavformat/tls_mbedtls.c
index 24c3afd94c..9508fe3436 100644
--- a/libavformat/tls_mbedtls.c
+++ b/libavformat/tls_mbedtls.c
@@ -26,6 +26,7 @@
#include 
#include 
#include 
+#include 
#ifdef MBEDTLS_PSA_CRYPTO_C
#include 
#endif
@@ -36,6 +37,7 @@
#include "tls.h"
#include "libavutil/mem.h"
#include "libavutil/parseutils.h"
+#include "libavutil/avstring.h"
  typedef struct TLSContext {
 const AVClass *class;
@@ -112,6 +114,13 @@ static int mbedtls_recv(void *ctx, unsigned char *buf, 
size_t len)
 return handle_transport_error(h, "ffurl_read", MBEDTLS_ERR_SSL_WANT_READ, 
ret);
}
+static void mbedtls_debug(void *ctx, int lvl, const char *file, int line, 
const char *msg)
+{
+URLContext *h = (URLContext*) ctx;
+int av_lvl = lvl >= 4 ? AV_LOG_TRACE : AV_LOG_DEBUG;
+av_log(h, av_lvl, "%s:%d: %s", av_basename(file), line, msg);
+}
+
static void handle_pk_parse_error(URLContext *h, int ret)
{
 switch (ret) {
@@ -201,6 +210,11 @@ static int tls_open(URLContext *h, const char *uri, int 
flags, AVDictionary **op
 mbedtls_x509_crt_init(&tls_ctx->ca_cert);
 mbedtls_pk_init(&tls_ctx->priv_key);
+if (av_log_get_level() >= AV_LOG_DEBUG) {
+mbedtls_ssl_conf_dbg(&tls_ctx->ssl_config, mbedtls_debug, shr->tcp);
+mbedtls_debug_set_threshold(4); // maximum

This doesn't look thread-safe / reentrant.


Indeed. But what alternative is there?

mbedTLS provides only this mechanism to get debug messages from it.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4 2/2][GSoC 2024] tests/checkasm: Add check_vvc_sad to vvc_mc.c

2024-05-21 Thread Martin Storsjö

On Tue, 21 May 2024, Rémi Denis-Courmont wrote:




Le 21 mai 2024 09:37:18 GMT+03:00, "Martin Storsjö"  a écrit :

On Tue, 21 May 2024, Rémi Denis-Courmont wrote:


Hi,

VVC benchmarks have increased checksam runtime by at least an order of 
magnitude. It's become so prohibitively slow that I could not even get 
to the end.


This is not an acceptable situation and impedes non-VVC assembler work


I don't quite understand; whenever benchmarking anything in checkasm, I 
would always run e.g. "checkasm --test=ac3dsp 
--bench=ac3_sum_square_bufferfly_float", limiting the total running of 
tests to a specific module, and only benchmarking a subset of the run 
functions. (The --bench parameter specifies a prefix; only functions 
matching that prefix gets benchmarked.)


Sure that's how you do it when you're working on a specific new 
optimisation. Now we're trying to compare 128-bit and 256-bit vectors 
for *all* existing functions to see which ones need to be reworked.


That used to work (in 30 minutes on K230, 5 minutes on Zen 2, IIRC). Now 
it's effectively broken and that's not acceptable'


Ah, I see. Ok, that's a reasonable thing to do I guess.

(It's of course possible to speed it up further by only testing specific 
--test=foo cases where you know you have riscv assembly worth 
benchmarking, but if it was doable in a tolerable amount of time before, 
that shouldn't be needed.)


Without limiting the scope with a --test parameter, checkasm 
benchmarking has always been prohibitively slow for me - so I don't 
think there's anything new here?


As said, it seems to be literally an order of magnitude slower than 
before if not worse.


That said I'm not familiar with the VVC tests in checkasm, perhaps they 
benchmark things excessively. But I don't see how that would impede 
work on other DSP functions in any way?


James also complained about the same thing before I.


Indeed, the tests in vvc_alf group seem to do excessive benchmarking 
(benchmarking every width/height combination between 4 and 128, in 
increments of 4). I sent a patch to cut this down to a reasonable amount.


Overall, I would expect the vvc checkasm tests to take a notable amount of 
time. Dav1d's checkasm takes twice as long to run as ffmpeg's, and it's 
probably a reasonable to assume that vvc is roughly of the same level of 
complexity as av1, so it's probably expected that ffmpeg's checkasm 
runtime at least doubles, once all vvc routines are integrated in 
checkasm.


But the tests in vvc_alf indeed had an entirely unreasonable amount of 
benchmarking hooked up, and that should indeed be fixed, e.g. with the 
patch I just sent.


// Martin

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] checkasm: vvc_alf: Limit benchmarking to a reasonable subset of functions

2024-05-21 Thread Martin Storsjö
Don't benchmark every single combination of widths and heights;
only benchmark cases which are squares (like in vvc_mc.c).

Contrary to vvc_mc, which increases sizes by doubling dimensions,
vvc_alf tests all sizes in increments of 4. Limit benchmarking to
the cases which are powers of two.

This reduces the number of benchmarked cases from 3072 down to 18.
---
 tests/checkasm/vvc_alf.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index 9526260598..6dd89bfafc 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
 if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w 
* SIZEOF_PIXEL))
 fail();
 }
-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
filter, clip, vb_pos);
+// Bench only square sizes, and ones with dimensions being a 
power of two.
+if (w == h && (w & (w - 1)) == 0)
+bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
 }
 if (check_func(c->alf.filter[CHROMA], 
"vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
 const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
@@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
 if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w 
* SIZEOF_PIXEL))
 fail();
 }
-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
filter, clip, vb_pos);
+if (w == h && (w & (w - 1)) == 0)
+bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
 }
 }
 }
@@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c, const int 
bit_depth)
 fail();
 if (memcmp(transpose_idx0, transpose_idx1, id_size))
 fail();
-bench_new(class_idx1, transpose_idx1, src1 + offset, stride, 
w, h, vb_pos, alf_gradient_tmp);
+// Bench only square sizes, and ones with dimensions being a 
power of two.
+if (w == h && (w & (w - 1)) == 0)
+bench_new(class_idx1, transpose_idx1, src1 + offset, 
stride, w, h, vb_pos, alf_gradient_tmp);
 }
 }
 }
-- 
2.39.3 (Apple Git-146)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


  1   2   >