Ping. > From: Wu, Jianhua > Sent: Wednesday, March 2, 2022 1:34 PM > To: ffmpeg-devel@ffmpeg.org > Subject: RE: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag > > Ping. > > From: Wu, Jianhua <jianhua...@intel.com> > > Sent: Wednesday, February 23, 2022 4:58 PM > > To: ffmpeg-devel@ffmpeg.org > > Cc: Wu, Jianhua <jianhua...@intel.com> > > Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag > > > > From: Wu Jianhua <jianhua...@intel.com> > > > > Signed-off-by: Wu Jianhua <jianhua...@intel.com> > > --- > > configure | 13 +++++++--- > > libavutil/cpu.c | 1 + > > libavutil/cpu.h | 1 + > > libavutil/x86/cpu.c | 8 ++++-- > > libavutil/x86/cpu.h | 1 + > > libavutil/x86/x86inc.asm | 53 > > ++++++++++++++++++++------------------- > > tests/checkasm/checkasm.c | 35 +++++++++++++------------- > > 7 files changed, 63 insertions(+), 49 deletions(-) > > > > diff --git a/configure b/configure > > index 1535dc3c5b..d88c2ae979 100755 > > --- a/configure > > +++ b/configure > > @@ -444,6 +444,7 @@ Optimization options (experts only): > > --disable-fma4 disable FMA4 optimizations > > --disable-avx2 disable AVX2 optimizations > > --disable-avx512 disable AVX-512 optimizations > > + --disable-avx512icl disable AVX-512ICL optimizations > > --disable-aesni disable AESNI optimizations > > --disable-armv5te disable armv5te optimizations > > --disable-armv6 disable armv6 optimizations > > @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD=" > > avx > > avx2 > > avx512 > > + avx512icl > > fma3 > > fma4 > > mmx > > @@ -2666,6 +2668,7 @@ fma3_deps="avx" > > fma4_deps="avx" > > avx2_deps="avx" > > avx512_deps="avx2" > > +avx512icl_deps="avx512" > > > > mmx_external_deps="x86asm" > > mmx_inline_deps="inline_asm x86" > > @@ -6128,10 +6131,11 @@ EOF > > elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;; > > esac > > > > - enabled avx512 && check_x86asm avx512_external "vmovdqa32 > > [eax]{k1}{z}, zmm0" > > - enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, > > ymm0, 0" > > - enabled xop && check_x86asm xop_external "vpmacsdd xmm0, > > xmm1, xmm2, xmm3" > > - enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, > > ymm1, ymm2, ymm3" > > + enabled avx512 && check_x86asm avx512_external "vmovdqa32 > > [eax]{k1}{z}, zmm0" > > + enabled avx512icl && check_x86asm avx512icl_external > > + "vpdpwssds > > zmm31{k1}{z}, zmm29, zmm28" > > + enabled avx2 && check_x86asm avx2_external "vextracti128 > > xmm0, ymm0, 0" > > + enabled xop && check_x86asm xop_external "vpmacsdd > > xmm0, > > xmm1, xmm2, xmm3" > > + enabled fma4 && check_x86asm fma4_external "vfmaddps > ymm0, > > ymm1, ymm2, ymm3" > > check_x86asm cpunop "CPU amdnop" > > fi > > > > @@ -7471,6 +7475,7 @@ if enabled x86; then > > echo "AVX enabled ${avx-no}" > > echo "AVX2 enabled ${avx2-no}" > > echo "AVX-512 enabled ${avx512-no}" > > + echo "AVX-512ICL enabled ${avx512icl-no}" > > echo "XOP enabled ${xop-no}" > > echo "FMA3 enabled ${fma3-no}" > > echo "FMA4 enabled ${fma4-no}" > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c index > > 1368502245..833c220192 > > 100644 > > --- a/libavutil/cpu.c > > +++ b/libavutil/cpu.c > > @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char > *s) > > { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_CMOV }, .unit = "flags" }, > > { "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_AESNI }, .unit = "flags" }, > > { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_AVX512 }, .unit = "flags" }, > > + { "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, > > { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, > > > > #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff > -- git > > a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5 > > 100644 > > --- a/libavutil/cpu.h > > +++ b/libavutil/cpu.h > > @@ -54,6 +54,7 @@ > > #define AV_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation > Instruction > > Set 1 > > #define AV_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation > Instruction > > Set 2 > > #define AV_CPU_FLAG_AVX512 0x100000 ///< AVX-512 functions: > > requires OS support even if YMM/ZMM registers aren't used > > +#define AV_CPU_FLAG_AVX512ICL 0x200000 ///< > > > +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V > > AES/VPCLMULQD > > +Q > > #define AV_CPU_FLAG_SLOW_GATHER 0x2000000 ///< CPU has slow > gathers. > > > > #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard > > diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index > > 7b13fcae91..d6cd4fab9c 100644 > > --- a/libavutil/x86/cpu.c > > +++ b/libavutil/x86/cpu.c > > @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void) > > rval |= AV_CPU_FLAG_AVX2; #if HAVE_AVX512 /* F, CD, BW, > > DQ, VL */ > > if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */ > > - if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == > > 0xd0030000) > > + if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == > > + 0xd0030000) { > > rval |= AV_CPU_FLAG_AVX512; > > - > > +#if HAVE_AVX512ICL > > + if ((ebx & 0xd0200000) == 0xd0200000 && (ecx & 0x5f42) == > 0x5f42) > > + rval |= AV_CPU_FLAG_AVX512ICL; #endif /* > > +HAVE_AVX512ICL */ > > + } > > } > > #endif /* HAVE_AVX512 */ > > #endif /* HAVE_AVX2 */ > > diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index > > 937c697fa0..40a1eef0ab 100644 > > --- a/libavutil/x86/cpu.h > > +++ b/libavutil/x86/cpu.h > > @@ -80,6 +80,7 @@ > > #define EXTERNAL_AVX2_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, > > _EXTERNAL, AVX2, AVX) > > #define EXTERNAL_AESNI(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, > > AESNI) > > #define EXTERNAL_AVX512(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, > > AVX512) > > +#define EXTERNAL_AVX512ICL(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, > > AVX512ICL) > > > > #define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, > > AMD3DNOW) > > #define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, > > AMD3DNOWEXT) > > diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index > > 01c35e3a4b..251ee797de 100644 > > --- a/libavutil/x86/x86inc.asm > > +++ b/libavutil/x86/x86inc.asm > > @@ -817,32 +817,33 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, > > jnle, jg, jge, jng, jnge, ja, jae, > > > > ; cpuflags > > > > -%assign cpuflags_mmx (1<<0) > > -%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx > > -%assign cpuflags_3dnow (1<<2) | cpuflags_mmx > > -%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow > > -%assign cpuflags_sse (1<<4) | cpuflags_mmx2 > > -%assign cpuflags_sse2 (1<<5) | cpuflags_sse > > -%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 > > -%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2 > > -%assign cpuflags_sse3 (1<<8) | cpuflags_sse2 > > -%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3 > > -%assign cpuflags_sse4 (1<<10)| cpuflags_ssse3 > > -%assign cpuflags_sse42 (1<<11)| cpuflags_sse4 > > -%assign cpuflags_aesni (1<<12)| cpuflags_sse42 > > -%assign cpuflags_avx (1<<13)| cpuflags_sse42 > > -%assign cpuflags_xop (1<<14)| cpuflags_avx > > -%assign cpuflags_fma4 (1<<15)| cpuflags_avx > > -%assign cpuflags_fma3 (1<<16)| cpuflags_avx > > -%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt > > -%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1 > > -%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2 > > -%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL > > - > > -%assign cpuflags_cache32 (1<<21) > > -%assign cpuflags_cache64 (1<<22) > > -%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function > variant > > -%assign cpuflags_atom (1<<24) > > +%assign cpuflags_mmx (1<<0) > > +%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx > > +%assign cpuflags_3dnow (1<<2) | cpuflags_mmx > > +%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow > > +%assign cpuflags_sse (1<<4) | cpuflags_mmx2 > > +%assign cpuflags_sse2 (1<<5) | cpuflags_sse > > +%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 > > +%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2 > > +%assign cpuflags_sse3 (1<<8) | cpuflags_sse2 > > +%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3 > > +%assign cpuflags_sse4 (1<<10)| cpuflags_ssse3 > > +%assign cpuflags_sse42 (1<<11)| cpuflags_sse4 > > +%assign cpuflags_aesni (1<<12)| cpuflags_sse42 > > +%assign cpuflags_avx (1<<13)| cpuflags_sse42 > > +%assign cpuflags_xop (1<<14)| cpuflags_avx > > +%assign cpuflags_fma4 (1<<15)| cpuflags_avx > > +%assign cpuflags_fma3 (1<<16)| cpuflags_avx > > +%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt > > +%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1 > > +%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2 > > +%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL > > +%assign cpuflags_avx512icl (1<<25)| cpuflags_avx512 > > + > > +%assign cpuflags_cache32 (1<<21) > > +%assign cpuflags_cache64 (1<<22) > > +%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function > variant > > +%assign cpuflags_atom (1<<24) > > > > ; Returns a boolean value expressing whether or not the specified > > cpuflag is enabled. > > %define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ > > x)) - > > 1) >> 31) & 1) > > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > > index f74125e810..e77b4ec20f 100644 > > --- a/tests/checkasm/checkasm.c > > +++ b/tests/checkasm/checkasm.c > > @@ -220,23 +220,24 @@ static const struct { > > { "MMI", "mmi", AV_CPU_FLAG_MMI }, > > { "MSA", "msa", AV_CPU_FLAG_MSA }, > > #elif ARCH_X86 > > - { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, > > - { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, > > - { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, > > - { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, > > - { "SSE", "sse", AV_CPU_FLAG_SSE }, > > - { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, > > - { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, > > - { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, > > - { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, > > - { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, > > - { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, > > - { "AVX", "avx", AV_CPU_FLAG_AVX }, > > - { "XOP", "xop", AV_CPU_FLAG_XOP }, > > - { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, > > - { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, > > - { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, > > - { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, > > + { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, > > + { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, > > + { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, > > + { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, > > + { "SSE", "sse", AV_CPU_FLAG_SSE }, > > + { "SSE2", "sse2", > AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, > > + { "SSE3", "sse3", > AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, > > + { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, > > + { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, > > + { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, > > + { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, > > + { "AVX", "avx", AV_CPU_FLAG_AVX }, > > + { "XOP", "xop", AV_CPU_FLAG_XOP }, > > + { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, > > + { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, > > + { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, > > + { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, > > + { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL }, > > #elif ARCH_LOONGARCH > > { "LSX", "lsx", AV_CPU_FLAG_LSX }, > > { "LASX", "lasx", AV_CPU_FLAG_LASX }, > > -- > > 2.17.1
Hi there, These patches have been sent for two weeks but got zero response so far. Could the maintainers of CPU flags and native HEVC decoding help review this patchset? Thanks, Jianhua _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".