[FFmpeg-cvslog] avcodec/mfenc: Dynamically load MFPlat.DLL

2022-05-25 Thread Trystan Mata
ffmpeg | branch: master | Trystan Mata  | Wed May 25 
12:54:01 2022 +0200| [1cb601ad10313981209a5918fc36a968068fc0ec] | committer: 
Martin Storsjö

avcodec/mfenc: Dynamically load MFPlat.DLL

Allows non-UWP builds of FFmpeg with MediaFoundation to work on
N editions of Windows which are without MediaFoundation by default.

On UWP target, FFmpeg is linked directly against MediaFoundation since
LoadLibrary is not available.

This commit adresses https://trac.ffmpeg.org/ticket/9788

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1cb601ad10313981209a5918fc36a968068fc0ec
---

 configure |  4 ++-
 libavcodec/mf_utils.c | 59 
 libavcodec/mf_utils.h | 35 ---
 libavcodec/mfenc.c| 93 ++-
 4 files changed, 124 insertions(+), 67 deletions(-)

diff --git a/configure b/configure
index 2337f0a8f2..6cf7d89674 100755
--- a/configure
+++ b/configure
@@ -3130,7 +3130,6 @@ wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
 
 # hardware-accelerated codecs
 mediafoundation_deps="mftransform_h MFCreateAlignedMemoryBuffer"
-mediafoundation_extralibs="-lmfplat -lmfuuid -lole32 -lstrmiids"
 omx_deps="libdl pthreads"
 omx_rpi_select="omx"
 qsv_deps="libmfx"
@@ -6879,6 +6878,9 @@ test_cpp MFCreateAlignedMemoryBuffer(size, align - 1, );
 if (FAILED(hr))
 return NULL;
 
@@ -548,7 +516,7 @@ const CLSID *ff_codec_to_mf_subtype(enum AVCodecID codec)
 }
 }
 
-static int init_com_mf(void *log)
+static int init_com_mf(void *log, MFFunctions *f)
 {
 HRESULT hr;
 
@@ -561,7 +529,7 @@ static int init_com_mf(void *log)
 return AVERROR(ENOSYS);
 }
 
-hr = MFStartup(MF_VERSION, MFSTARTUP_FULL);
+hr = f->MFStartup(MF_VERSION, MFSTARTUP_FULL);
 if (FAILED(hr)) {
 av_log(log, AV_LOG_ERROR, "could not initialize MediaFoundation\n");
 CoUninitialize();
@@ -571,15 +539,16 @@ static int init_com_mf(void *log)
 return 0;
 }
 
-static void uninit_com_mf(void)
+static void uninit_com_mf(MFFunctions *f)
 {
-MFShutdown();
+f->MFShutdown();
 CoUninitialize();
 }
 
 // Find and create a IMFTransform with the given input/output types. When done,
 // you should use ff_free_mf() to destroy it, which will also uninit COM.
 int ff_instantiate_mf(void *log,
+  MFFunctions *f,
   GUID category,
   MFT_REGISTER_TYPE_INFO *in_type,
   MFT_REGISTER_TYPE_INFO *out_type,
@@ -594,7 +563,7 @@ int ff_instantiate_mf(void *log,
 IMFActivate *winner = 0;
 UINT32 flags;
 
-ret = init_com_mf(log);
+ret = init_com_mf(log, f);
 if (ret < 0)
 return ret;
 
@@ -606,7 +575,7 @@ int ff_instantiate_mf(void *log,
 flags |= MFT_ENUM_FLAG_SYNCMFT;
 }
 
-hr = ff_MFTEnumEx(category, flags, in_type, out_type, ,
+hr = f->MFTEnumEx(category, flags, in_type, out_type, ,
   _activate);
 if (FAILED(hr))
 goto error_uninit_mf;
@@ -667,14 +636,14 @@ int ff_instantiate_mf(void *log,
 return 0;
 
 error_uninit_mf:
-uninit_com_mf();
+uninit_com_mf(f);
 return AVERROR(ENOSYS);
 }
 
-void ff_free_mf(IMFTransform **mft)
+void ff_free_mf(MFFunctions *f, IMFTransform **mft)
 {
 if (*mft)
 IMFTransform_Release(*mft);
 *mft = NULL;
-uninit_com_mf();
+uninit_com_mf(f);
 }
diff --git a/libavcodec/mf_utils.h b/libavcodec/mf_utils.h
index d514723c3b..3b12344f3e 100644
--- a/libavcodec/mf_utils.h
+++ b/libavcodec/mf_utils.h
@@ -41,6 +41,25 @@
 
 #include "avcodec.h"
 
+// Windows N editions does not provide MediaFoundation by default.
+// So to avoid DLL loading error, MediaFoundation will be dynamically loaded
+// except on UWP build since LoadLibrary is not available on it.
+typedef struct MFFunctions {
+HRESULT (WINAPI *MFStartup) (ULONG Version, DWORD dwFlags);
+HRESULT (WINAPI *MFShutdown) (void);
+HRESULT (WINAPI *MFCreateAlignedMemoryBuffer) (DWORD cbMaxLength,
+   DWORD cbAligment,
+   IMFMediaBuffer **ppBuffer);
+HRESULT (WINAPI *MFCreateSample) (IMFSample **ppIMFSample);
+HRESULT (WINAPI *MFCreateMediaType) (IMFMediaType **ppMFType);
+// MFTEnumEx is missing in Windows Vista's mfplat.dll.
+HRESULT (WINAPI *MFTEnumEx)(GUID guidCategory, UINT32 Flags,
+const MFT_REGISTER_TYPE_INFO *pInputType,
+const MFT_REGISTER_TYPE_INFO *pOutputType,
+IMFActivate ***pppMFTActivate,
+UINT32 *pnumMFTActivate);
+} MFFunctions;
+
 // These 

[FFmpeg-cvslog] qsv: add requirement for the mininal version of libmfx

2022-05-25 Thread Haihao Xiang
ffmpeg | branch: master | Haihao Xiang  | 
Sun May 22 20:19:11 2022 +0800| [478e1a98a289bbc777bddc02fdcefeaa3c416a63] | 
committer: Haihao Xiang

qsv: add requirement for the mininal version of libmfx

libmfx 1.28 was released 3 years ago, it is easy to get a greater
version than 1.28. We may remove lots of compile-time checks if adding
the requirement for the minimal version in the configure script.

Reviewed-by: softworkz 
Signed-off-by: Jean-Baptiste Kempf 
Signed-off-by: Haihao Xiang 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=478e1a98a289bbc777bddc02fdcefeaa3c416a63
---

 configure  |   7 +-
 libavcodec/qsv.c   |  24 ---
 libavcodec/qsvenc.c| 476 -
 libavcodec/qsvenc.h|  51 +
 libavcodec/qsvenc_h264.c   |   6 -
 libavcodec/qsvenc_hevc.c   |  10 -
 libavfilter/vf_scale_qsv.c |  13 +-
 libavfilter/vf_vpp_qsv.c   | 143 +++---
 libavutil/hwcontext_qsv.c  |   2 -
 9 files changed, 250 insertions(+), 482 deletions(-)

diff --git a/configure b/configure
index f115b21064..2337f0a8f2 100755
--- a/configure
+++ b/configure
@@ -6566,8 +6566,11 @@ enabled liblensfun&& require_pkg_config 
liblensfun lensfun lensfun.h lf_
 # Media SDK or Intel Media Server Studio, these don't come with
 # pkg-config support.  Instead, users should make sure that the build
 # can find the libraries and headers through other means.
-enabled libmfx&& { check_pkg_config libmfx libmfx "mfx/mfxvideo.h" 
MFXInit ||
-   { require libmfx "mfx/mfxvideo.h" MFXInit 
"-llibmfx $advapi32_extralibs" && warn "using libmfx without pkg-config"; } }
+enabled libmfx&& { check_pkg_config libmfx "mfx >= 1.28" 
"mfx/mfxvideo.h" MFXInit ||
+   { require libmfx "mfx/mfxvideo.h mfx/mfxdefs.h" 
MFXInit "-llibmfx $advapi32_extralibs" &&
+ { test_cpp_condition mfx/mfxdefs.h 
"MFX_VERSION >= 1028" || die "ERROR: libmfx version must be >= 1.28"; }  &&
+ warn "using libmfx without pkg-config"; } }
+
 if enabled libmfx; then
check_cc MFX_CODEC_VP9 "mfx/mfxvp9.h mfx/mfxstructures.h" "MFX_CODEC_VP9"
 fi
diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c
index b86c20b153..385b43bb6c 100644
--- a/libavcodec/qsv.c
+++ b/libavcodec/qsv.c
@@ -38,34 +38,26 @@
 
 #define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
 
-#if QSV_VERSION_ATLEAST(1, 12)
 #include "mfx/mfxvp8.h"
-#endif
 
 int ff_qsv_codec_id_to_mfx(enum AVCodecID codec_id)
 {
 switch (codec_id) {
 case AV_CODEC_ID_H264:
 return MFX_CODEC_AVC;
-#if QSV_VERSION_ATLEAST(1, 8)
 case AV_CODEC_ID_HEVC:
 return MFX_CODEC_HEVC;
-#endif
 case AV_CODEC_ID_MPEG1VIDEO:
 case AV_CODEC_ID_MPEG2VIDEO:
 return MFX_CODEC_MPEG2;
 case AV_CODEC_ID_VC1:
 return MFX_CODEC_VC1;
-#if QSV_VERSION_ATLEAST(1, 12)
 case AV_CODEC_ID_VP8:
 return MFX_CODEC_VP8;
-#endif
 case AV_CODEC_ID_MJPEG:
 return MFX_CODEC_JPEG;
-#if QSV_VERSION_ATLEAST(1, 19)
 case AV_CODEC_ID_VP9:
 return MFX_CODEC_VP9;
-#endif
 #if QSV_VERSION_ATLEAST(1, 34)
 case AV_CODEC_ID_AV1:
 return MFX_CODEC_AV1;
@@ -189,17 +181,11 @@ enum AVPixelFormat ff_qsv_map_fourcc(uint32_t fourcc)
 case MFX_FOURCC_NV12: return AV_PIX_FMT_NV12;
 case MFX_FOURCC_P010: return AV_PIX_FMT_P010;
 case MFX_FOURCC_P8:   return AV_PIX_FMT_PAL8;
-#if QSV_VERSION_ATLEAST(1, 9)
 case MFX_FOURCC_A2RGB10: return AV_PIX_FMT_X2RGB10;
-#endif
-#if QSV_VERSION_ATLEAST(1, 17)
 case MFX_FOURCC_RGB4: return AV_PIX_FMT_BGRA;
-#endif
 #if CONFIG_VAAPI
 case MFX_FOURCC_YUY2: return AV_PIX_FMT_YUYV422;
-#if QSV_VERSION_ATLEAST(1, 27)
 case MFX_FOURCC_Y210: return AV_PIX_FMT_Y210;
-#endif
 #endif
 }
 return AV_PIX_FMT_NONE;
@@ -217,27 +203,21 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat format, uint32_t 
*fourcc)
 case AV_PIX_FMT_P010:
 *fourcc = MFX_FOURCC_P010;
 return AV_PIX_FMT_P010;
-#if QSV_VERSION_ATLEAST(1, 9)
 case AV_PIX_FMT_X2RGB10:
 *fourcc = MFX_FOURCC_A2RGB10;
 return AV_PIX_FMT_X2RGB10;
-#endif
-#if QSV_VERSION_ATLEAST(1, 17)
 case AV_PIX_FMT_BGRA:
 *fourcc = MFX_FOURCC_RGB4;
 return AV_PIX_FMT_BGRA;
-#endif
 #if CONFIG_VAAPI
 case AV_PIX_FMT_YUV422P:
 case AV_PIX_FMT_YUYV422:
 *fourcc = MFX_FOURCC_YUY2;
 return AV_PIX_FMT_YUYV422;
-#if QSV_VERSION_ATLEAST(1, 27)
 case AV_PIX_FMT_YUV422P10:
 case AV_PIX_FMT_Y210:
 *fourcc = MFX_FOURCC_Y210;
 return AV_PIX_FMT_Y210;
-#endif
 #endif
 default:
 return AVERROR(ENOSYS);
@@ -438,9 +418,7 @@ int ff_qsv_init_internal_session(AVCodecContext *avctx, 
QSVSession *qs,
 const char *desc;
 int ret;
 
-#if QSV_VERSION_ATLEAST(1, 16)
 init_par.GPUCopy= gpu_copy;
-#endif
 init_par.Implementation = impl;
 

[FFmpeg-cvslog] libavcodec/qsvenc: expose only supported options

2022-05-25 Thread Dmitry Rogozhkin
ffmpeg | branch: master | Dmitry Rogozhkin 
 | Thu May 19 12:54:16 2022 -0700| 
[f8a07c4d4abbd3974e074bc54bc22eeaa0f46051] | committer: Haihao Xiang

libavcodec/qsvenc: expose only supported options

vp9, hevc, avc, mpeg2 QSV encoders inherit common list
of options (QSV_COMMON_OPTS) while bunch of options is not
actually supported by current qsv code. The only codec which
supportes everything is avc, followed by hevc, while vp9 and
mpeg2 significantly fall behind. This creates difficulties
for the users to use qsv encoders. This patch fixes options
list for encoders leaving only those which are actually
supported.

Signed-off-by: Dmitry Rogozhkin 
Signed-off-by: Haihao Xiang 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f8a07c4d4abbd3974e074bc54bc22eeaa0f46051
---

 libavcodec/qsvenc.h   | 52 ++-
 libavcodec/qsvenc_h264.c  | 12 +++
 libavcodec/qsvenc_hevc.c  |  9 
 libavcodec/qsvenc_mpeg2.c |  1 +
 4 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h
index cb84723dfa..33bbc2a1d6 100644
--- a/libavcodec/qsvenc.h
+++ b/libavcodec/qsvenc.h
@@ -89,22 +89,46 @@
 { "slow",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_3  },
INT_MIN, INT_MAX, VE, "preset" },   
 \
 { "slower",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_2  },
INT_MIN, INT_MAX, VE, "preset" },   
 \
 { "veryslow",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
MFX_TARGETUSAGE_BEST_QUALITY  }, INT_MIN, INT_MAX, VE, "preset" },  
  \
-{ "rdo","Enable rate distortion optimization",OFFSET(qsv.rdo), 
   AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE },
 \
+{ "forced_idr", "Forcing I frames as IDR frames", 
OFFSET(qsv.forced_idr), AV_OPT_TYPE_BOOL,{ .i64 = 0  },  0,  1, VE 
}, \
+{ "low_power", "enable low power mode(experimental: many limitations by mfx 
version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = 
-1}, -1, 1, VE},
+
+#define QSV_OPTION_RDO \
+{ "rdo","Enable rate distortion optimization",OFFSET(qsv.rdo), 
   AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE },
+
+#define QSV_OPTION_MAX_FRAME_SIZE \
 { "max_frame_size", "Maximum encoded frame size in bytes",
OFFSET(qsv.max_frame_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,INT_MAX, VE 
}, \
 { "max_frame_size_i", "Maximum encoded I frame size in 
bytes",OFFSET(qsv.max_frame_size_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  
INT_MAX, VE }, \
-{ "max_frame_size_p", "Maximum encoded P frame size in 
bytes",OFFSET(qsv.max_frame_size_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  
INT_MAX, VE }, \
-{ "max_slice_size", "Maximum encoded slice size in bytes",
OFFSET(qsv.max_slice_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,INT_MAX, VE 
}, \
-{ "bitrate_limit",  "Toggle bitrate limitations", 
OFFSET(qsv.bitrate_limit),  AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE 
}, \
-{ "mbbrc",  "MB level bitrate control",   
OFFSET(qsv.mbbrc),  AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE 
}, \
-{ "extbrc", "Extended bitrate control",   
OFFSET(qsv.extbrc), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE 
}, \
-{ "adaptive_i", "Adaptive I-frame placement", 
OFFSET(qsv.adaptive_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE 
}, \
-{ "adaptive_b", "Adaptive B-frame placement", 
OFFSET(qsv.adaptive_b), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE 
}, \
-{ "p_strategy", "Enable P-pyramid: 0-default 1-simple 2-pyramid(bf need to 
be set to 0).",OFFSET(qsv.p_strategy), AV_OPT_TYPE_INT,{ .i64 = 0}, 0,  
  2, VE }, \
-{ "b_strategy", "Strategy to choose between I/P/B-frames", 
OFFSET(qsv.b_strategy),AV_OPT_TYPE_INT, { .i64 = -1 }, -1,  1, VE 
}, \
-{ "forced_idr", "Forcing I frames as IDR frames", 
OFFSET(qsv.forced_idr), AV_OPT_TYPE_BOOL,{ .i64 = 0  },  0,  1, VE 
}, \
-{ "low_power", "enable low power mode(experimental: many limitations by mfx 
version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = 
-1}, -1, 1, VE},\
-{ "dblk_idc", "This option disable deblocking. It has value in range 0~2.",   
OFFSET(qsv.dblk_idc),   AV_OPT_TYPE_INT,{ .i64 = 0 },   0,  2,  VE},\
-{ "low_delay_brc",   "Allow to strictly obey avg frame size", 
OFFSET(qsv.low_delay_brc),  

[FFmpeg-cvslog] checkasm: improve hevc_sao test

2022-05-25 Thread J . Dekker
ffmpeg | branch: master | J. Dekker  | Tue May 17 13:48:23 
2022 +0200| [cc679054c715acda9438e566b8de3a9eba421ac3] | committer: J. Dekker

checkasm: improve hevc_sao test

The HEVC decoder can call these functions with smaller widths than the
functions themselves are designed to operate on so we should only check
the relevant output

Signed-off-by: J. Dekker 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cc679054c715acda9438e566b8de3a9eba421ac3
---

 tests/checkasm/hevc_sao.c | 51 ---
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/tests/checkasm/hevc_sao.c b/tests/checkasm/hevc_sao.c
index 6b750758e2..4a23010243 100644
--- a/tests/checkasm/hevc_sao.c
+++ b/tests/checkasm/hevc_sao.c
@@ -78,20 +78,26 @@ static void check_sao_band(HEVCDSPContext h, int bit_depth)
 
 for (i = 0; i <= 4; i++) {
 int block_size = sao_size[i];
+int prev_size = i > 0 ? sao_size[i - 1] : 0;
 ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, 
ptrdiff_t dst_stride, ptrdiff_t src_stride,
   int16_t *sao_offset_val, int sao_left_class, int 
width, int height);
 
-randomize_buffers(src0, src1, BUF_SIZE);
-randomize_buffers2(offset_val, OFFSET_LENGTH);
-memset(dst0, 0, BUF_SIZE);
-memset(dst1, 0, BUF_SIZE);
-
-if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", 
block_size, block_size, bit_depth)) {
-call_ref(dst0, src0, stride, stride, offset_val, left_class, 
block_size, block_size);
-call_new(dst1, src1, stride, stride, offset_val, left_class, 
block_size, block_size);
-if (memcmp(dst0, dst1, BUF_SIZE))
-fail();
+if (check_func(h.sao_band_filter[i], "hevc_sao_band_%d_%d", 
block_size, bit_depth)) {
+
+for (int w = prev_size + 4; w <= block_size; w += 4) {
+randomize_buffers(src0, src1, BUF_SIZE);
+randomize_buffers2(offset_val, OFFSET_LENGTH);
+memset(dst0, 0, BUF_SIZE);
+memset(dst1, 0, BUF_SIZE);
+
+call_ref(dst0, src0, stride, stride, offset_val, left_class, 
w, block_size);
+call_new(dst1, src1, stride, stride, offset_val, left_class, 
w, block_size);
+for (int j = 0; j < block_size; j++) {
+if (memcmp(dst0 + j*stride, dst1 + j*stride, 
w*SIZEOF_PIXEL))
+fail();
+}
+}
 bench_new(dst1, src1, stride, stride, offset_val, left_class, 
block_size, block_size);
 }
 }
@@ -109,21 +115,26 @@ static void check_sao_edge(HEVCDSPContext h, int 
bit_depth)
 
 for (i = 0; i <= 4; i++) {
 int block_size = sao_size[i];
+int prev_size = i > 0 ? sao_size[i - 1] : 0;
 ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
 int offset = (AV_INPUT_BUFFER_PADDING_SIZE + 
PIXEL_STRIDE)*SIZEOF_PIXEL;
 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, 
ptrdiff_t stride_dst,
   int16_t *sao_offset_val, int eo, int width, int 
height);
 
-randomize_buffers(src0, src1, BUF_SIZE);
-randomize_buffers2(offset_val, OFFSET_LENGTH);
-memset(dst0, 0, BUF_SIZE);
-memset(dst1, 0, BUF_SIZE);
-
-if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", 
block_size, block_size, bit_depth)) {
-call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, 
block_size);
-call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, 
block_size);
-if (memcmp(dst0, dst1, BUF_SIZE))
-fail();
+for (int w = prev_size + 4; w <= block_size; w += 4) {
+randomize_buffers(src0, src1, BUF_SIZE);
+randomize_buffers2(offset_val, OFFSET_LENGTH);
+memset(dst0, 0, BUF_SIZE);
+memset(dst1, 0, BUF_SIZE);
+
+if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%d_%d", 
block_size, bit_depth)) {
+call_ref(dst0, src0 + offset, stride, offset_val, eo, w, 
block_size);
+call_new(dst1, src1 + offset, stride, offset_val, eo, w, 
block_size);
+for (int j = 0; j < block_size; j++) {
+if (memcmp(dst0 + j*stride, dst1 + j*stride, 
w*SIZEOF_PIXEL))
+fail();
+}
+}
 bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, 
block_size);
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/aarch64: add hevc sao edge 16x16

2022-05-25 Thread J . Dekker
ffmpeg | branch: master | J. Dekker  | Thu Apr 28 14:57:33 
2022 +0200| [92f67e40170994dcb7a96ae362d95308f6744294] | committer: J. Dekker

lavc/aarch64: add hevc sao edge 16x16

bench on AWS Graviton:

hevc_sao_edge_16x16_8_c: 1857.0
hevc_sao_edge_16x16_8_neon: 211.0
hevc_sao_edge_32x32_8_c: 7802.2
hevc_sao_edge_32x32_8_neon: 808.2
hevc_sao_edge_48x48_8_c: 16764.2
hevc_sao_edge_48x48_8_neon: 1796.5
hevc_sao_edge_64x64_8_c: 32647.5
hevc_sao_edge_64x64_8_neon: 3118.5

Signed-off-by: J. Dekker 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=92f67e40170994dcb7a96ae362d95308f6744294
---

 libavcodec/aarch64/hevcdsp_init_aarch64.c |  8 +++-
 libavcodec/aarch64/hevcdsp_sao_neon.S | 66 +++
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index c8963e6104..df521bb083 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -57,8 +57,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, 
uint8_t *_src,
   ptrdiff_t stride_dst, ptrdiff_t stride_src,
   int16_t *sao_offset_val, int sao_left_class,
   int width, int height);
-
-
+void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, 
ptrdiff_t stride_dst,
+  int16_t *sao_offset_val, int eo, int 
width, int height);
 
 av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
 {
@@ -80,6 +80,10 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, 
const int bit_depth)
 c->sao_band_filter[2]  =
 c->sao_band_filter[3]  =
 c->sao_band_filter[4]  = ff_hevc_sao_band_filter_8x8_8_neon;
+c->sao_edge_filter[1]  =
+c->sao_edge_filter[2]  =
+c->sao_edge_filter[3]  =
+c->sao_edge_filter[4]  = ff_hevc_sao_edge_filter_16x16_8_neon;
 }
 if (bit_depth == 10) {
 c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S 
b/libavcodec/aarch64/hevcdsp_sao_neon.S
index e07e0cea2d..0315c479df 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -74,3 +74,69 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
 bne 1b
 ret
 endfunc
+
+// ASSUMES STRIDE_SRC = 192
+.Lsao_edge_pos:
+.word 1 // horizontal
+.word 192 // vertical
+.word 192 + 1 // 45 degree
+.word 192 - 1 // 135 degree
+
+// ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff 
stride_dst,
+//  int16 *sao_offset_val, int eo, int 
width, int height)
+function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
+adr x7, .Lsao_edge_pos
+ld1 {v3.8h}, [x3]  // load sao_offset_val
+add w5,  w5,  #0xF
+bic w5,  w5,  #0xF
+ldr w4, [x7, w4, uxtw #2]  // stride_src
+mov v3.h[7], v3.h[0]   // reorder to [1,2,0,3,4]
+mov v3.h[0], v3.h[1]
+mov v3.h[1], v3.h[2]
+mov v3.h[2], v3.h[7]
+// split 16bit values into two tables
+uzp2v1.16b, v3.16b, v3.16b // sao_offset_val -> upper
+uzp1v0.16b, v3.16b, v3.16b // sao_offset_val -> lower
+moviv2.16b, #2
+mov x15, #192
+// strides between end of line and next src/dst
+sub x15, x15, x5   // stride_src - width
+sub x16, x2, x5// stride_dst - width
+mov x11, x1// copy base src
+1:  // new line
+mov x14, x5// copy width
+sub x12, x11, x4   // src_a (prev) = src - 
sao_edge_pos
+add x13, x11, x4   // src_b (next) = src + 
sao_edge_pos
+2:  // process 16 bytes
+ld1 {v3.16b}, [x11], #16   // load src
+ld1 {v4.16b}, [x12], #16   // load src_a (prev)
+ld1 {v5.16b}, [x13], #16   // load src_b (next)
+cmhiv16.16b, v4.16b, v3.16b// (prev > cur)
+cmhiv17.16b, v3.16b, v4.16b// (cur > prev)
+cmhiv18.16b, v5.16b, v3.16b// (next > cur)
+cmhiv19.16b, v3.16b, v5.16b// (cur > next)
+sub v20.16b, v16.16b, v17.16b  // diff0 = CMP(cur, prev) = 
(cur > prev) - (cur < prev)
+sub v21.16b, v18.16b, v19.16b  // diff1 = CMP(cur, next) = 
(cur > next) - (cur < next)
+add v20.16b, v20.16b, v21.16b  // diff = 

[FFmpeg-cvslog] lavc/aarch64: add hevc sao edge 8x8

2022-05-25 Thread J . Dekker
ffmpeg | branch: master | J. Dekker  | Thu Apr 28 14:57:43 
2022 +0200| [2e832be322eb456e44b1e928904fa470a0b00a67] | committer: J. Dekker

lavc/aarch64: add hevc sao edge 8x8

bench on AWS Graviton:

hevc_sao_edge_8x8_8_c: 516.0
hevc_sao_edge_8x8_8_neon: 81.0

Signed-off-by: J. Dekker 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2e832be322eb456e44b1e928904fa470a0b00a67
---

 libavcodec/aarch64/hevcdsp_init_aarch64.c |  3 ++
 libavcodec/aarch64/hevcdsp_sao_neon.S | 51 +++
 2 files changed, 54 insertions(+)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index df521bb083..2002530266 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -59,6 +59,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, 
uint8_t *_src,
   int width, int height);
 void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, 
ptrdiff_t stride_dst,
   int16_t *sao_offset_val, int eo, int 
width, int height);
+void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t 
stride_dst,
+  int16_t *sao_offset_val, int eo, int 
width, int height);
 
 av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
 {
@@ -80,6 +82,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, 
const int bit_depth)
 c->sao_band_filter[2]  =
 c->sao_band_filter[3]  =
 c->sao_band_filter[4]  = ff_hevc_sao_band_filter_8x8_8_neon;
+c->sao_edge_filter[0]  = ff_hevc_sao_edge_filter_8x8_8_neon;
 c->sao_edge_filter[1]  =
 c->sao_edge_filter[2]  =
 c->sao_edge_filter[3]  =
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S 
b/libavcodec/aarch64/hevcdsp_sao_neon.S
index 0315c479df..efd8112af4 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -140,3 +140,54 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
 // no lines to filter
 ret
 endfunc
+
+// ff_hevc_sao_edge_filter_8x8_8_neon(char *dst, char *src, ptrdiff stride_dst,
+//int16 *sao_offset_val, int eo, int 
width, int height)
+function ff_hevc_sao_edge_filter_8x8_8_neon, export=1
+adr x7, .Lsao_edge_pos
+ldr w4, [x7, w4, uxtw #2]
+ld1 {v3.8h}, [x3]
+mov v3.h[7], v3.h[0]
+mov v3.h[0], v3.h[1]
+mov v3.h[1], v3.h[2]
+mov v3.h[2], v3.h[7]
+uzp2v1.16b, v3.16b, v3.16b
+uzp1v0.16b, v3.16b, v3.16b
+moviv2.16b, #2
+add x16, x0, x2
+lsl x2,  x2, #1
+mov x15, #192
+mov x8,  x1
+sub x9,  x1, x4
+add x10, x1, x4
+lsr w17, w6, #1
+1:  ld1 {v3.d}[0], [ x8], x15
+ld1 {v4.d}[0], [ x9], x15
+ld1 {v5.d}[0], [x10], x15
+ld1 {v3.d}[1], [ x8], x15
+ld1 {v4.d}[1], [ x9], x15
+ld1 {v5.d}[1], [x10], x15
+cmhiv16.16b, v4.16b, v3.16b
+cmhiv17.16b, v3.16b, v4.16b
+cmhiv18.16b, v5.16b, v3.16b
+cmhiv19.16b, v3.16b, v5.16b
+sub v20.16b, v16.16b, v17.16b
+sub v21.16b, v18.16b, v19.16b
+add v20.16b, v20.16b, v21.16b
+add v20.16b, v20.16b, v2.16b
+tbl v16.16b, {v0.16b}, v20.16b
+tbl v17.16b, {v1.16b}, v20.16b
+uxtlv20.8h, v3.8b
+uxtl2   v21.8h, v3.16b
+zip1v18.16b, v16.16b, v17.16b
+zip2v19.16b, v16.16b, v17.16b
+sqadd   v20.8h, v18.8h, v20.8h
+sqadd   v21.8h, v19.8h, v21.8h
+sqxtun  v6.8b, v20.8h
+sqxtun  v7.8b, v21.8h
+st1 {v6.8b}, [ x0], x2
+st1 {v7.8b}, [x16], x2
+subsx17, x17, #1
+b.ne1b
+ret
+endfunc

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/aarch64: fix hevc sao band filter

2022-05-25 Thread J . Dekker
ffmpeg | branch: master | J. Dekker  | Tue Apr 26 09:29:54 
2022 +0200| [d957ee34a6ec998ea00d6d07ac687c5d7a9792a2] | committer: J. Dekker

lavc/aarch64: fix hevc sao band filter

The SAO band filter can be called with non-multiples of 8, we round up
to the nearest multiple of 8 to account for this.

Signed-off-by: J. Dekker 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d957ee34a6ec998ea00d6d07ac687c5d7a9792a2
---

 libavcodec/aarch64/hevcdsp_init_aarch64.c | 10 +-
 libavcodec/aarch64/hevcdsp_sao_neon.S |  8 ++--
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 1e40be740c..c8963e6104 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -75,11 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, 
const int bit_depth)
 c->idct_dc[1]  = ff_hevc_idct_8x8_dc_8_neon;
 c->idct_dc[2]  = ff_hevc_idct_16x16_dc_8_neon;
 c->idct_dc[3]  = ff_hevc_idct_32x32_dc_8_neon;
-// This function is disabled, as it doesn't handle widths that aren't
-// an even multiple of 8 correctly. fate-hevc doesn't exercise that
-// for the current size, but if enabled for bigger sizes, the cases
-// of non-multiple of 8 seem to arise.
-//c->sao_band_filter[0]  = ff_hevc_sao_band_filter_8x8_8_neon;
+c->sao_band_filter[0]  =
+c->sao_band_filter[1]  =
+c->sao_band_filter[2]  =
+c->sao_band_filter[3]  =
+c->sao_band_filter[4]  = ff_hevc_sao_band_filter_8x8_8_neon;
 }
 if (bit_depth == 10) {
 c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S 
b/libavcodec/aarch64/hevcdsp_sao_neon.S
index d523bf584d..e07e0cea2d 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -41,7 +41,11 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
 and w10, w10, #0x1F
 strhw9, [sp, x10, lsl #1]
 bne 0b
+add w6,  w6,  #7
+bic w6,  w6,  #7
 ld1 {v16.16b-v19.16b}, [sp], #64
+sub x2,  x2,  x6
+sub x3,  x3,  x6
 moviv20.8h,   #1
 1:  mov w8,  w6// beginning of line
 2:  // Simple layout for accessing 16bit values
@@ -52,7 +56,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
 // |xDE#xAD|xCA#xFE|xBE#xEF|xFE#xED|
 // +--->
 //i-0 i-1 i-2 i-3
-ld1 {v2.8b}, [x1]  // dst[x] = 
av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
+ld1 {v2.8b}, [x1], #8  // dst[x] = 
av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
 uxtlv0.8h,  v2.8b  // load src[x]
 ushrv2.8h,  v0.8h, #3  // >> BIT_DEPTH - 3
 shl v1.8h,  v2.8h, #1  // low (x2, accessing short)
@@ -61,7 +65,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
 tbx v2.16b, {v16.16b-v19.16b}, v1.16b // table
 add v1.8h,  v0.8h, v2.8h   // src[x] + table
 sqxtun  v4.8b,  v1.8h  // clip + narrow
-st1 {v4.8b}, [x0]  // store
+st1 {v4.8b}, [x0], #8  // store
 subsw8, w8,  #8// done 8 pixels
 bne 2b
 subsw7, w7,  #1// finished line, prep. new

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".