[FFmpeg-devel] [PATCH 02/11] avutil: add AVX-512 flags

2017-11-09 Thread James Darnley
---
 libavutil/cpu.c   | 6 +-
 libavutil/cpu.h   | 1 +
 libavutil/tests/cpu.c | 1 +
 libavutil/x86/cpu.h   | 2 ++
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index c8401b8258..6548cc3042 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -80,7 +80,8 @@ void av_force_cpu_flags(int arg){
 AV_CPU_FLAG_XOP  |
 AV_CPU_FLAG_FMA3 |
 AV_CPU_FLAG_FMA4 |
-AV_CPU_FLAG_AVX2 ))
+AV_CPU_FLAG_AVX2 |
+AV_CPU_FLAG_AVX512   ))
 && !(arg & AV_CPU_FLAG_MMX)) {
 av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n");
 arg |= AV_CPU_FLAG_MMX;
@@ -126,6 +127,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
 #define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | AV_CPU_FLAG_BMI1)
 #define CPUFLAG_AESNI(AV_CPU_FLAG_AESNI| CPUFLAG_SSE42)
+#define CPUFLAG_AVX512   (AV_CPU_FLAG_AVX512   | CPUFLAG_AVX2)
 static const AVOption cpuflags_opts[] = {
 { "flags"   , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, 
INT64_MAX, .unit = "flags" },
 #if   ARCH_PPC
@@ -154,6 +156,7 @@ int av_parse_cpu_flags(const char *s)
 { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT
 },.unit = "flags" },
 { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV
 },.unit = "flags" },
 { "aesni"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AESNI   
 },.unit = "flags" },
+{ "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX512  
 },.unit = "flags" },
 #elif ARCH_ARM
 { "armv5te",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE 
 },.unit = "flags" },
 { "armv6",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6   
 },.unit = "flags" },
@@ -216,6 +219,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
 { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_3DNOWEXT },.unit = "flags" },
 { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV
 },.unit = "flags" },
 { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI   
 },.unit = "flags" },
+{ "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512  
 },.unit = "flags" },
 
 #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
 #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9e5d40affe..91523f3f5a 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -55,6 +55,7 @@
 #define AV_CPU_FLAG_FMA30x1 ///< Haswell FMA3 functions
 #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation Instruction Set 1
 #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation Instruction Set 2
+#define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions
 
 #define AV_CPU_FLAG_ALTIVEC  0x0001 ///< standard
 #define AV_CPU_FLAG_VSX  0x0002 ///< ISA 2.06
diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index f02a54cbbb..ce45b715a0 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c
@@ -73,6 +73,7 @@ static const struct {
 { AV_CPU_FLAG_BMI1,  "bmi1"   },
 { AV_CPU_FLAG_BMI2,  "bmi2"   },
 { AV_CPU_FLAG_AESNI, "aesni"  },
+{ AV_CPU_FLAG_AVX512,"avx512" },
 #endif
 { 0 }
 };
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 309b8e746c..7f4e5d08bb 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -50,6 +50,7 @@
 #define X86_FMA4(flags) CPUEXT(flags, FMA4)
 #define X86_AVX2(flags) CPUEXT(flags, AVX2)
 #define X86_AESNI(flags)CPUEXT(flags, AESNI)
+#define X86_AVX512(flags)   CPUEXT(flags, AVX512)
 
 #define EXTERNAL_AMD3DNOW(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
 #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, 
AMD3DNOWEXT)
@@ -79,6 +80,7 @@
 #define EXTERNAL_AVX2_FAST(flags)   CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, 
AVX2, AVX)
 #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, 
AVX2, AVX)
 #define EXTERNAL_AESNI(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
+#define EXTERNAL_AVX512(flags)  CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
 
 #define INLINE_AMD3DNOW(flags)  CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
 #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 02/11] avutil: add AVX-512 flags

2017-11-09 Thread James Almer
On 11/9/2017 8:58 AM, James Darnley wrote:
> ---
>  libavutil/cpu.c   | 6 +-
>  libavutil/cpu.h   | 1 +
>  libavutil/tests/cpu.c | 1 +
>  libavutil/x86/cpu.h   | 2 ++
>  4 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> index c8401b8258..6548cc3042 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -80,7 +80,8 @@ void av_force_cpu_flags(int arg){
>  AV_CPU_FLAG_XOP  |
>  AV_CPU_FLAG_FMA3 |
>  AV_CPU_FLAG_FMA4 |
> -AV_CPU_FLAG_AVX2 ))
> +AV_CPU_FLAG_AVX2 |
> +AV_CPU_FLAG_AVX512   ))
>  && !(arg & AV_CPU_FLAG_MMX)) {
>  av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n");
>  arg |= AV_CPU_FLAG_MMX;
> @@ -126,6 +127,7 @@ int av_parse_cpu_flags(const char *s)
>  #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
>  #define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | AV_CPU_FLAG_BMI1)
>  #define CPUFLAG_AESNI(AV_CPU_FLAG_AESNI| CPUFLAG_SSE42)
> +#define CPUFLAG_AVX512   (AV_CPU_FLAG_AVX512   | CPUFLAG_AVX2)
>  static const AVOption cpuflags_opts[] = {
>  { "flags"   , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, 
> INT64_MAX, .unit = "flags" },
>  #if   ARCH_PPC
> @@ -154,6 +156,7 @@ int av_parse_cpu_flags(const char *s)
>  { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT  
>},.unit = "flags" },
>  { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV  
>},.unit = "flags" },
>  { "aesni"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AESNI 
>},.unit = "flags" },
> +{ "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX512
>},.unit = "flags" },
>  #elif ARCH_ARM
>  { "armv5te",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
> AV_CPU_FLAG_ARMV5TE  },.unit = "flags" },
>  { "armv6",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6 
>},.unit = "flags" },
> @@ -216,6 +219,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
>  { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
> AV_CPU_FLAG_3DNOWEXT },.unit = "flags" },
>  { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV  
>},.unit = "flags" },
>  { "aesni",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI 
>},.unit = "flags" },
> +{ "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
> AV_CPU_FLAG_AVX512   },.unit = "flags" },
>  
>  #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
>  #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE
> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> index 9e5d40affe..91523f3f5a 100644
> --- a/libavutil/cpu.h
> +++ b/libavutil/cpu.h
> @@ -55,6 +55,7 @@
>  #define AV_CPU_FLAG_FMA30x1 ///< Haswell FMA3 functions
>  #define AV_CPU_FLAG_BMI10x2 ///< Bit Manipulation Instruction 
> Set 1
>  #define AV_CPU_FLAG_BMI20x4 ///< Bit Manipulation Instruction 
> Set 2
> +#define AV_CPU_FLAG_AVX512 0x10 ///< AVX-512 functions

NIt: "AVX-512 functions: requires OS support even if YMM/ZMM registers
aren't used"

That's more in line with the AVX and AVX2 lines.

>  
>  #define AV_CPU_FLAG_ALTIVEC  0x0001 ///< standard
>  #define AV_CPU_FLAG_VSX  0x0002 ///< ISA 2.06
> diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
> index f02a54cbbb..ce45b715a0 100644
> --- a/libavutil/tests/cpu.c
> +++ b/libavutil/tests/cpu.c
> @@ -73,6 +73,7 @@ static const struct {
>  { AV_CPU_FLAG_BMI1,  "bmi1"   },
>  { AV_CPU_FLAG_BMI2,  "bmi2"   },
>  { AV_CPU_FLAG_AESNI, "aesni"  },
> +{ AV_CPU_FLAG_AVX512,"avx512" },
>  #endif
>  { 0 }
>  };
> diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
> index 309b8e746c..7f4e5d08bb 100644
> --- a/libavutil/x86/cpu.h
> +++ b/libavutil/x86/cpu.h
> @@ -50,6 +50,7 @@
>  #define X86_FMA4(flags) CPUEXT(flags, FMA4)
>  #define X86_AVX2(flags) CPUEXT(flags, AVX2)
>  #define X86_AESNI(flags)CPUEXT(flags, AESNI)
> +#define X86_AVX512(flags)   CPUEXT(flags, AVX512)
>  
>  #define EXTERNAL_AMD3DNOW(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
>  #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, 
> AMD3DNOWEXT)
> @@ -79,6 +80,7 @@
>  #define EXTERNAL_AVX2_FAST(flags)   CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, 
> AVX2, AVX)
>  #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, 
> AVX2, AVX)
>  #define EXTERNAL_AESNI(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
> +#define EXTERNAL_AVX512(flags)  CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
>  
>  #define INLINE_AMD3DNOW(flags)  CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
>  #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE, 
> AMD3D