Re: [FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
On Wed, Feb 23, 2022 at 9:57 AM wrote: > > From: Wu Jianhua > > Signed-off-by: Wu Jianhua > --- > configure | 13 +++--- > libavutil/cpu.c | 1 + > libavutil/cpu.h | 1 + > libavutil/x86/cpu.c | 8 -- > libavutil/x86/cpu.h | 1 + > libavutil/x86/x86inc.asm | 53 --- > tests/checkasm/checkasm.c | 35 +- > 7 files changed, 63 insertions(+), 49 deletions(-) This patch LGTM (didn't look at the actual asm code yet though). ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
Ping. > From: Wu, Jianhua > Sent: Wednesday, March 2, 2022 1:34 PM > To: ffmpeg-devel@ffmpeg.org > Subject: RE: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag > > Ping. > > From: Wu, Jianhua > > Sent: Wednesday, February 23, 2022 4:58 PM > > To: ffmpeg-devel@ffmpeg.org > > Cc: Wu, Jianhua > > Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag > > > > From: Wu Jianhua > > > > Signed-off-by: Wu Jianhua > > --- > > configure | 13 +++--- > > libavutil/cpu.c | 1 + > > libavutil/cpu.h | 1 + > > libavutil/x86/cpu.c | 8 -- > > libavutil/x86/cpu.h | 1 + > > libavutil/x86/x86inc.asm | 53 > > --- > > tests/checkasm/checkasm.c | 35 +- > > 7 files changed, 63 insertions(+), 49 deletions(-) > > > > diff --git a/configure b/configure > > index 1535dc3c5b..d88c2ae979 100755 > > --- a/configure > > +++ b/configure > > @@ -444,6 +444,7 @@ Optimization options (experts only): > >--disable-fma4 disable FMA4 optimizations > >--disable-avx2 disable AVX2 optimizations > >--disable-avx512 disable AVX-512 optimizations > > + --disable-avx512icl disable AVX-512ICL optimizations > >--disable-aesni disable AESNI optimizations > >--disable-armv5tedisable armv5te optimizations > >--disable-armv6 disable armv6 optimizations > > @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD=" > > avx > > avx2 > > avx512 > > +avx512icl > > fma3 > > fma4 > > mmx > > @@ -2666,6 +2668,7 @@ fma3_deps="avx" > > fma4_deps="avx" > > avx2_deps="avx" > > avx512_deps="avx2" > > +avx512icl_deps="avx512" > > > > mmx_external_deps="x86asm" > > mmx_inline_deps="inline_asm x86" > > @@ -6128,10 +6131,11 @@ EOF > > elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;; > > esac > > > > -enabled avx512 && check_x86asm avx512_external "vmovdqa32 > > [eax]{k1}{z}, zmm0" > > -enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, > > ymm0, 0" > > -enabled xop&& check_x86asm xop_external"vpmacsdd xmm0, > > xmm1, xmm2, xmm3" > > -enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, > > ymm1, ymm2, ymm3" > > +enabled avx512&& check_x86asm avx512_external"vmovdqa32 > > [eax]{k1}{z}, zmm0" > > +enabled avx512icl && check_x86asm avx512icl_external > > + "vpdpwssds > > zmm31{k1}{z}, zmm29, zmm28" > > +enabled avx2 && check_x86asm avx2_external "vextracti128 > > xmm0, ymm0, 0" > > +enabled xop && check_x86asm xop_external "vpmacsdd > > xmm0, > > xmm1, xmm2, xmm3" > > +enabled fma4 && check_x86asm fma4_external "vfmaddps > ymm0, > > ymm1, ymm2, ymm3" > > check_x86asm cpunop "CPU amdnop" > > fi > > > > @@ -7471,6 +7475,7 @@ if enabled x86; then > > echo "AVX enabled ${avx-no}" > > echo "AVX2 enabled ${avx2-no}" > > echo "AVX-512 enabled ${avx512-no}" > > +echo "AVX-512ICL enabled${avx512icl-no}" > > echo "XOP enabled ${xop-no}" > > echo "FMA3 enabled ${fma3-no}" > > echo "FMA4 enabled ${fma4-no}" > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c index > > 1368502245..833c220192 > > 100644 > > --- a/libavutil/cpu.c > > +++ b/libavutil/cpu.c > > @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char > *s) > > { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_CMOV },.unit = "flags" }, > > { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_AESNI},.unit = "flags" }, > > { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_AVX512 },.unit = "flags" }, > > +{ "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, > > { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > > AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, > > > > #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff > -- git > > a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5 > > 100644 > > --- a/libavutil/cpu.h > > +++ b/libavutil/cpu.h > > @@ -54,6 +54,7 @@ > > #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation > Instruction > > Set 1 > > #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation > Instruction > > Set 2 > > #define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions: > > requires OS support even if YMM/ZMM registers aren't used > > +#define AV_CPU_FLAG_AVX512ICL 0x20 ///< > > > +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V > > AES/VPCLMULQD > > +Q > > #define AV_CPU_FLAG_SLOW_GATHER 0x200 ///< CPU has slow > gathers. > > > > #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard > > diff --git a/libavutil/x86/
Re: [FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
Ping. > -Original Message- > From: Wu, Jianhua > Sent: Wednesday, February 23, 2022 4:58 PM > To: ffmpeg-devel@ffmpeg.org > Cc: Wu, Jianhua > Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag > > From: Wu Jianhua > > Signed-off-by: Wu Jianhua > --- > configure | 13 +++--- > libavutil/cpu.c | 1 + > libavutil/cpu.h | 1 + > libavutil/x86/cpu.c | 8 -- > libavutil/x86/cpu.h | 1 + > libavutil/x86/x86inc.asm | 53 --- > tests/checkasm/checkasm.c | 35 +- > 7 files changed, 63 insertions(+), 49 deletions(-) > > diff --git a/configure b/configure > index 1535dc3c5b..d88c2ae979 100755 > --- a/configure > +++ b/configure > @@ -444,6 +444,7 @@ Optimization options (experts only): >--disable-fma4 disable FMA4 optimizations >--disable-avx2 disable AVX2 optimizations >--disable-avx512 disable AVX-512 optimizations > + --disable-avx512icl disable AVX-512ICL optimizations >--disable-aesni disable AESNI optimizations >--disable-armv5tedisable armv5te optimizations >--disable-armv6 disable armv6 optimizations > @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD=" > avx > avx2 > avx512 > +avx512icl > fma3 > fma4 > mmx > @@ -2666,6 +2668,7 @@ fma3_deps="avx" > fma4_deps="avx" > avx2_deps="avx" > avx512_deps="avx2" > +avx512icl_deps="avx512" > > mmx_external_deps="x86asm" > mmx_inline_deps="inline_asm x86" > @@ -6128,10 +6131,11 @@ EOF > elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;; > esac > > -enabled avx512 && check_x86asm avx512_external "vmovdqa32 > [eax]{k1}{z}, zmm0" > -enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, > ymm0, 0" > -enabled xop&& check_x86asm xop_external"vpmacsdd xmm0, > xmm1, xmm2, xmm3" > -enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, > ymm1, ymm2, ymm3" > +enabled avx512&& check_x86asm avx512_external"vmovdqa32 > [eax]{k1}{z}, zmm0" > +enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds > zmm31{k1}{z}, zmm29, zmm28" > +enabled avx2 && check_x86asm avx2_external "vextracti128 > xmm0, ymm0, 0" > +enabled xop && check_x86asm xop_external "vpmacsdd xmm0, > xmm1, xmm2, xmm3" > +enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, > ymm1, ymm2, ymm3" > check_x86asm cpunop "CPU amdnop" > fi > > @@ -7471,6 +7475,7 @@ if enabled x86; then > echo "AVX enabled ${avx-no}" > echo "AVX2 enabled ${avx2-no}" > echo "AVX-512 enabled ${avx512-no}" > +echo "AVX-512ICL enabled${avx512icl-no}" > echo "XOP enabled ${xop-no}" > echo "FMA3 enabled ${fma3-no}" > echo "FMA4 enabled ${fma4-no}" > diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 1368502245..833c220192 > 100644 > --- a/libavutil/cpu.c > +++ b/libavutil/cpu.c > @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) > { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > AV_CPU_FLAG_CMOV },.unit = "flags" }, > { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > AV_CPU_FLAG_AESNI},.unit = "flags" }, > { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > AV_CPU_FLAG_AVX512 },.unit = "flags" }, > +{ "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, > { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, > > #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff -- > git a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5 100644 > --- a/libavutil/cpu.h > +++ b/libavutil/cpu.h > @@ -54,6 +54,7 @@ > #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation Instruction > Set 1 > #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation Instruction > Set 2 > #define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions: > requires OS support even if YMM/ZMM registers aren't used > +#define AV_CPU_FLAG_AVX512ICL 0x20 ///< > +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V > AES/VPCLMULQD > +Q > #define AV_CPU_FLAG_SLOW_GATHER 0x200 ///< CPU has slow > gathers. > > #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard > diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index > 7b13fcae91..d6cd4fab9c 100644 > --- a/libavutil/x86/cpu.c > +++ b/libavutil/x86/cpu.c > @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void) > rval |= AV_CPU_FLAG_AVX2; > #if HAVE_AVX512 /* F, CD, BW, DQ, VL */ > if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */ > -if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd003)
[FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
From: Wu Jianhua Signed-off-by: Wu Jianhua --- configure | 13 +++--- libavutil/cpu.c | 1 + libavutil/cpu.h | 1 + libavutil/x86/cpu.c | 8 -- libavutil/x86/cpu.h | 1 + libavutil/x86/x86inc.asm | 53 --- tests/checkasm/checkasm.c | 35 +- 7 files changed, 63 insertions(+), 49 deletions(-) diff --git a/configure b/configure index 1535dc3c5b..d88c2ae979 100755 --- a/configure +++ b/configure @@ -444,6 +444,7 @@ Optimization options (experts only): --disable-fma4 disable FMA4 optimizations --disable-avx2 disable AVX2 optimizations --disable-avx512 disable AVX-512 optimizations + --disable-avx512icl disable AVX-512ICL optimizations --disable-aesni disable AESNI optimizations --disable-armv5tedisable armv5te optimizations --disable-armv6 disable armv6 optimizations @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD=" avx avx2 avx512 +avx512icl fma3 fma4 mmx @@ -2666,6 +2668,7 @@ fma3_deps="avx" fma4_deps="avx" avx2_deps="avx" avx512_deps="avx2" +avx512icl_deps="avx512" mmx_external_deps="x86asm" mmx_inline_deps="inline_asm x86" @@ -6128,10 +6131,11 @@ EOF elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;; esac -enabled avx512 && check_x86asm avx512_external "vmovdqa32 [eax]{k1}{z}, zmm0" -enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, ymm0, 0" -enabled xop&& check_x86asm xop_external"vpmacsdd xmm0, xmm1, xmm2, xmm3" -enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, ymm1, ymm2, ymm3" +enabled avx512&& check_x86asm avx512_external"vmovdqa32 [eax]{k1}{z}, zmm0" +enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds zmm31{k1}{z}, zmm29, zmm28" +enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, ymm0, 0" +enabled xop && check_x86asm xop_external "vpmacsdd xmm0, xmm1, xmm2, xmm3" +enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, ymm1, ymm2, ymm3" check_x86asm cpunop "CPU amdnop" fi @@ -7471,6 +7475,7 @@ if enabled x86; then echo "AVX enabled ${avx-no}" echo "AVX2 enabled ${avx2-no}" echo "AVX-512 enabled ${avx512-no}" +echo "AVX-512ICL enabled${avx512icl-no}" echo "XOP enabled ${xop-no}" echo "FMA3 enabled ${fma3-no}" echo "FMA4 enabled ${fma4-no}" diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 1368502245..833c220192 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV },.unit = "flags" }, { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI },.unit = "flags" }, { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 },.unit = "flags" }, +{ "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff --git a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -54,6 +54,7 @@ #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation Instruction Set 1 #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation Instruction Set 2 #define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used +#define AV_CPU_FLAG_AVX512ICL 0x20 ///< F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ #define AV_CPU_FLAG_SLOW_GATHER 0x200 ///< CPU has slow gathers. #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 7b13fcae91..d6cd4fab9c 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void) rval |= AV_CPU_FLAG_AVX2; #if HAVE_AVX512 /* F, CD, BW, DQ, VL */ if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */ -if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd003) == 0xd003) +if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd003) == 0xd003) { rval |= AV_CPU_FLAG_AVX512; - +#if HAVE_AVX512ICL +if ((ebx & 0xd020) == 0xd020 && (ecx & 0x5f42) == 0x5f42) +rval |= AV_CPU_FLAG_AVX512ICL; +#endif /* HAVE_AVX512ICL */ +} } #endif /* HAVE_AVX512 */ #endif /* HAVE_AVX2 */ diff --git a/libavutil/x86