Re: [FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag

2022-03-10 Thread Henrik Gramner
On Wed, Feb 23, 2022 at 9:57 AM  wrote:
>
> From: Wu Jianhua 
>
> Signed-off-by: Wu Jianhua 
> ---
>  configure | 13 +++---
>  libavutil/cpu.c   |  1 +
>  libavutil/cpu.h   |  1 +
>  libavutil/x86/cpu.c   |  8 --
>  libavutil/x86/cpu.h   |  1 +
>  libavutil/x86/x86inc.asm  | 53 ---
>  tests/checkasm/checkasm.c | 35 +-
>  7 files changed, 63 insertions(+), 49 deletions(-)

This patch LGTM (didn't look at the actual asm code yet though).
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag

2022-03-08 Thread Wu, Jianhua
Ping.
> From: Wu, Jianhua
> Sent: Wednesday, March 2, 2022 1:34 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: RE: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> 
> Ping.
> > From: Wu, Jianhua 
> > Sent: Wednesday, February 23, 2022 4:58 PM
> > To: ffmpeg-devel@ffmpeg.org
> > Cc: Wu, Jianhua 
> > Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> >
> > From: Wu Jianhua 
> >
> > Signed-off-by: Wu Jianhua 
> > ---
> >  configure | 13 +++---
> >  libavutil/cpu.c   |  1 +
> >  libavutil/cpu.h   |  1 +
> >  libavutil/x86/cpu.c   |  8 --
> >  libavutil/x86/cpu.h   |  1 +
> >  libavutil/x86/x86inc.asm  | 53
> > ---
> >  tests/checkasm/checkasm.c | 35 +-
> >  7 files changed, 63 insertions(+), 49 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 1535dc3c5b..d88c2ae979 100755
> > --- a/configure
> > +++ b/configure
> > @@ -444,6 +444,7 @@ Optimization options (experts only):
> >--disable-fma4   disable FMA4 optimizations
> >--disable-avx2   disable AVX2 optimizations
> >--disable-avx512 disable AVX-512 optimizations
> > +  --disable-avx512icl  disable AVX-512ICL optimizations
> >--disable-aesni  disable AESNI optimizations
> >--disable-armv5tedisable armv5te optimizations
> >--disable-armv6  disable armv6 optimizations
> > @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD="
> >  avx
> >  avx2
> >  avx512
> > +avx512icl
> >  fma3
> >  fma4
> >  mmx
> > @@ -2666,6 +2668,7 @@ fma3_deps="avx"
> >  fma4_deps="avx"
> >  avx2_deps="avx"
> >  avx512_deps="avx2"
> > +avx512icl_deps="avx512"
> >
> >  mmx_external_deps="x86asm"
> >  mmx_inline_deps="inline_asm x86"
> > @@ -6128,10 +6131,11 @@ EOF
> >  elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
> >  esac
> >
> > -enabled avx512 && check_x86asm avx512_external "vmovdqa32
> > [eax]{k1}{z}, zmm0"
> > -enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0,
> > ymm0, 0"
> > -enabled xop&& check_x86asm xop_external"vpmacsdd xmm0,
> > xmm1, xmm2, xmm3"
> > -enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0,
> > ymm1, ymm2, ymm3"
> > +enabled avx512&& check_x86asm avx512_external"vmovdqa32
> > [eax]{k1}{z}, zmm0"
> > +enabled avx512icl && check_x86asm avx512icl_external
> > + "vpdpwssds
> > zmm31{k1}{z}, zmm29, zmm28"
> > +enabled avx2  && check_x86asm avx2_external  "vextracti128
> > xmm0, ymm0, 0"
> > +enabled xop   && check_x86asm xop_external   "vpmacsdd 
> > xmm0,
> > xmm1, xmm2, xmm3"
> > +enabled fma4  && check_x86asm fma4_external  "vfmaddps
> ymm0,
> > ymm1, ymm2, ymm3"
> >  check_x86asm cpunop  "CPU amdnop"
> >  fi
> >
> > @@ -7471,6 +7475,7 @@ if enabled x86; then
> >  echo "AVX enabled   ${avx-no}"
> >  echo "AVX2 enabled  ${avx2-no}"
> >  echo "AVX-512 enabled   ${avx512-no}"
> > +echo "AVX-512ICL enabled${avx512icl-no}"
> >  echo "XOP enabled   ${xop-no}"
> >  echo "FMA3 enabled  ${fma3-no}"
> >  echo "FMA4 enabled  ${fma4-no}"
> > diff --git a/libavutil/cpu.c b/libavutil/cpu.c index
> > 1368502245..833c220192
> > 100644
> > --- a/libavutil/cpu.c
> > +++ b/libavutil/cpu.c
> > @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char
> *s)
> >  { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_CMOV },.unit = "flags" },
> >  { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AESNI},.unit = "flags" },
> >  { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AVX512   },.unit = "flags" },
> > +{ "avx512icl",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AVX512ICL   }, .unit = "flags" },
> >  { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
> >
> >  #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff
> -- git
> > a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5
> > 100644
> > --- a/libavutil/cpu.h
> > +++ b/libavutil/cpu.h
> > @@ -54,6 +54,7 @@
> >  #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation
> Instruction
> > Set 1
> >  #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation
> Instruction
> > Set 2
> >  #define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions:
> > requires OS support even if YMM/ZMM registers aren't used
> > +#define AV_CPU_FLAG_AVX512ICL  0x20 ///<
> >
> +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V
> > AES/VPCLMULQD
> > +Q
> >  #define AV_CPU_FLAG_SLOW_GATHER  0x200 ///< CPU has slow
> gathers.
> >
> >  #define AV_CPU_FLAG_ALTIVEC  0x0001 ///< standard
> > diff --git a/libavutil/x86/

Re: [FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag

2022-03-01 Thread Wu, Jianhua
Ping.
> -Original Message-
> From: Wu, Jianhua 
> Sent: Wednesday, February 23, 2022 4:58 PM
> To: ffmpeg-devel@ffmpeg.org
> Cc: Wu, Jianhua 
> Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> 
> From: Wu Jianhua 
> 
> Signed-off-by: Wu Jianhua 
> ---
>  configure | 13 +++---
>  libavutil/cpu.c   |  1 +
>  libavutil/cpu.h   |  1 +
>  libavutil/x86/cpu.c   |  8 --
>  libavutil/x86/cpu.h   |  1 +
>  libavutil/x86/x86inc.asm  | 53 ---
>  tests/checkasm/checkasm.c | 35 +-
>  7 files changed, 63 insertions(+), 49 deletions(-)
> 
> diff --git a/configure b/configure
> index 1535dc3c5b..d88c2ae979 100755
> --- a/configure
> +++ b/configure
> @@ -444,6 +444,7 @@ Optimization options (experts only):
>--disable-fma4   disable FMA4 optimizations
>--disable-avx2   disable AVX2 optimizations
>--disable-avx512 disable AVX-512 optimizations
> +  --disable-avx512icl  disable AVX-512ICL optimizations
>--disable-aesni  disable AESNI optimizations
>--disable-armv5tedisable armv5te optimizations
>--disable-armv6  disable armv6 optimizations
> @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD="
>  avx
>  avx2
>  avx512
> +avx512icl
>  fma3
>  fma4
>  mmx
> @@ -2666,6 +2668,7 @@ fma3_deps="avx"
>  fma4_deps="avx"
>  avx2_deps="avx"
>  avx512_deps="avx2"
> +avx512icl_deps="avx512"
> 
>  mmx_external_deps="x86asm"
>  mmx_inline_deps="inline_asm x86"
> @@ -6128,10 +6131,11 @@ EOF
>  elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
>  esac
> 
> -enabled avx512 && check_x86asm avx512_external "vmovdqa32
> [eax]{k1}{z}, zmm0"
> -enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0,
> ymm0, 0"
> -enabled xop&& check_x86asm xop_external"vpmacsdd xmm0,
> xmm1, xmm2, xmm3"
> -enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0,
> ymm1, ymm2, ymm3"
> +enabled avx512&& check_x86asm avx512_external"vmovdqa32
> [eax]{k1}{z}, zmm0"
> +enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds
> zmm31{k1}{z}, zmm29, zmm28"
> +enabled avx2  && check_x86asm avx2_external  "vextracti128
> xmm0, ymm0, 0"
> +enabled xop   && check_x86asm xop_external   "vpmacsdd xmm0,
> xmm1, xmm2, xmm3"
> +enabled fma4  && check_x86asm fma4_external  "vfmaddps ymm0,
> ymm1, ymm2, ymm3"
>  check_x86asm cpunop  "CPU amdnop"
>  fi
> 
> @@ -7471,6 +7475,7 @@ if enabled x86; then
>  echo "AVX enabled   ${avx-no}"
>  echo "AVX2 enabled  ${avx2-no}"
>  echo "AVX-512 enabled   ${avx512-no}"
> +echo "AVX-512ICL enabled${avx512icl-no}"
>  echo "XOP enabled   ${xop-no}"
>  echo "FMA3 enabled  ${fma3-no}"
>  echo "FMA4 enabled  ${fma4-no}"
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 1368502245..833c220192
> 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
>  { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_CMOV },.unit = "flags" },
>  { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_AESNI},.unit = "flags" },
>  { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_AVX512   },.unit = "flags" },
> +{ "avx512icl",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_AVX512ICL   }, .unit = "flags" },
>  { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
> 
>  #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff --
> git a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5 100644
> --- a/libavutil/cpu.h
> +++ b/libavutil/cpu.h
> @@ -54,6 +54,7 @@
>  #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation Instruction
> Set 1
>  #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation Instruction
> Set 2
>  #define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions:
> requires OS support even if YMM/ZMM registers aren't used
> +#define AV_CPU_FLAG_AVX512ICL  0x20 ///<
> +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V
> AES/VPCLMULQD
> +Q
>  #define AV_CPU_FLAG_SLOW_GATHER  0x200 ///< CPU has slow
> gathers.
> 
>  #define AV_CPU_FLAG_ALTIVEC  0x0001 ///< standard
> diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index
> 7b13fcae91..d6cd4fab9c 100644
> --- a/libavutil/x86/cpu.c
> +++ b/libavutil/x86/cpu.c
> @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void)
>  rval |= AV_CPU_FLAG_AVX2;
>  #if HAVE_AVX512 /* F, CD, BW, DQ, VL */
>  if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
> -if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd003) 

[FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag

2022-02-23 Thread jianhua . wu-at-intel . com
From: Wu Jianhua 

Signed-off-by: Wu Jianhua 
---
 configure | 13 +++---
 libavutil/cpu.c   |  1 +
 libavutil/cpu.h   |  1 +
 libavutil/x86/cpu.c   |  8 --
 libavutil/x86/cpu.h   |  1 +
 libavutil/x86/x86inc.asm  | 53 ---
 tests/checkasm/checkasm.c | 35 +-
 7 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/configure b/configure
index 1535dc3c5b..d88c2ae979 100755
--- a/configure
+++ b/configure
@@ -444,6 +444,7 @@ Optimization options (experts only):
   --disable-fma4   disable FMA4 optimizations
   --disable-avx2   disable AVX2 optimizations
   --disable-avx512 disable AVX-512 optimizations
+  --disable-avx512icl  disable AVX-512ICL optimizations
   --disable-aesni  disable AESNI optimizations
   --disable-armv5tedisable armv5te optimizations
   --disable-armv6  disable armv6 optimizations
@@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD="
 avx
 avx2
 avx512
+avx512icl
 fma3
 fma4
 mmx
@@ -2666,6 +2668,7 @@ fma3_deps="avx"
 fma4_deps="avx"
 avx2_deps="avx"
 avx512_deps="avx2"
+avx512icl_deps="avx512"
 
 mmx_external_deps="x86asm"
 mmx_inline_deps="inline_asm x86"
@@ -6128,10 +6131,11 @@ EOF
 elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
 esac
 
-enabled avx512 && check_x86asm avx512_external "vmovdqa32 
[eax]{k1}{z}, zmm0"
-enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0, 
ymm0, 0"
-enabled xop&& check_x86asm xop_external"vpmacsdd xmm0, xmm1, 
xmm2, xmm3"
-enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0, ymm1, 
ymm2, ymm3"
+enabled avx512&& check_x86asm avx512_external"vmovdqa32 
[eax]{k1}{z}, zmm0"
+enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds 
zmm31{k1}{z}, zmm29, zmm28"
+enabled avx2  && check_x86asm avx2_external  "vextracti128 
xmm0, ymm0, 0"
+enabled xop   && check_x86asm xop_external   "vpmacsdd xmm0, 
xmm1, xmm2, xmm3"
+enabled fma4  && check_x86asm fma4_external  "vfmaddps ymm0, 
ymm1, ymm2, ymm3"
 check_x86asm cpunop  "CPU amdnop"
 fi
 
@@ -7471,6 +7475,7 @@ if enabled x86; then
 echo "AVX enabled   ${avx-no}"
 echo "AVX2 enabled  ${avx2-no}"
 echo "AVX-512 enabled   ${avx512-no}"
+echo "AVX-512ICL enabled${avx512icl-no}"
 echo "XOP enabled   ${xop-no}"
 echo "FMA3 enabled  ${fma3-no}"
 echo "FMA4 enabled  ${fma4-no}"
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 1368502245..833c220192 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
 { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV
 },.unit = "flags" },
 { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI   
 },.unit = "flags" },
 { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512  
 },.unit = "flags" },
+{ "avx512icl",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_AVX512ICL   }, .unit = "flags" },
 { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
 
 #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index ce9bf14bf7..9711e574c5 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -54,6 +54,7 @@
 #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation Instruction Set 1
 #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation Instruction Set 2
 #define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions: requires OS 
support even if YMM/ZMM registers aren't used
+#define AV_CPU_FLAG_AVX512ICL  0x20 ///< 
F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ
 #define AV_CPU_FLAG_SLOW_GATHER  0x200 ///< CPU has slow gathers.
 
 #define AV_CPU_FLAG_ALTIVEC  0x0001 ///< standard
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 7b13fcae91..d6cd4fab9c 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void)
 rval |= AV_CPU_FLAG_AVX2;
 #if HAVE_AVX512 /* F, CD, BW, DQ, VL */
 if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
-if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd003) == 0xd003)
+if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd003) == 0xd003) 
{
 rval |= AV_CPU_FLAG_AVX512;
-
+#if HAVE_AVX512ICL
+if ((ebx & 0xd020) == 0xd020 && (ecx & 0x5f42) == 
0x5f42)
+rval |= AV_CPU_FLAG_AVX512ICL;
+#endif /* HAVE_AVX512ICL */
+}
 }
 #endif /* HAVE_AVX512 */
 #endif /* HAVE_AVX2 */
diff --git a/libavutil/x86