I recently wanted to use multiversioning for BMI2 specific extensions PDEP/PEXT, and noticed it wasn't there. So I wrote this patch to add it, and also added AES, F16C and BMI1 for completeness.
Happy new year `Allan
commit 062c09d45d22302ffbd4f86d88e16a1a0d49cd80 Author: Allan Sandfeld Jensen <allan.jen...@digia.com> Date: Fri Dec 26 21:14:01 2014 +0100 AES, F16C BMI and BMI2 multiversion support diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ff8a5e6..83f16a5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2014-12-29 Allan Sandfeld Jensen <sandf...@kde.org> + + * config/i386/i386.c (get_builtin_code_for_version): Add + support for AES, BMI, BMI2 and F16C multiversion functions. + 2014-12-27 H.J. Lu <hongjiu...@intel.com> PR target/64409 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d693fdb..a1b74dc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -34261,15 +34261,22 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) P_PROC_SSE4_A, P_SSE4_1, P_SSE4_2, - P_PROC_SSE4_2, P_POPCNT, + P_PROC_SSE4_2, + P_AES, + P_PROC_AES, P_AVX, P_PROC_AVX, + P_F16C, + P_PROC_F16C, + P_BMI, + P_PROC_BMI, P_FMA4, P_XOP, P_PROC_XOP, P_FMA, P_PROC_FMA, + P_BMI2, P_AVX2, P_PROC_AVX2, P_AVX512F, @@ -34295,12 +34302,16 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) {"sse4a", P_SSE4_A}, {"ssse3", P_SSSE3}, {"sse4.1", P_SSE4_1}, - {"sse4.2", P_SSE4_2}, {"popcnt", P_POPCNT}, + {"sse4.2", P_SSE4_2}, + {"aes", P_AES}, {"avx", P_AVX}, + {"f16c", P_F16C}, + {"bmi", P_BMI}, {"fma4", P_FMA4}, {"xop", P_XOP}, {"fma", P_FMA}, + {"bmi2", P_BMI2}, {"avx2", P_AVX2}, {"avx512f", P_AVX512F} }; @@ -34350,21 +34361,25 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) priority = P_PROC_SSSE3; break; case PROCESSOR_NEHALEM: - if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES) + if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES) { arg_str = "westmere"; - else + priority = P_PROC_AES; + } else { /* We translate "arch=corei7" and "arch=nehalem" to "corei7" so that it will be mapped to M_INTEL_COREI7 as cpu type to cover all M_INTEL_COREI7_XXXs. */ arg_str = "corei7"; - priority = P_PROC_SSE4_2; + priority = P_PROC_SSE4_2; + } break; case PROCESSOR_SANDYBRIDGE: - if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) + if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) { arg_str = "ivybridge"; - else + priority = P_PROC_F16C; + } else { arg_str = "sandybridge"; - priority = P_PROC_AVX; + priority = P_PROC_AVX; + } break; case PROCESSOR_HASWELL: if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX) @@ -34395,7 +34410,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) break; case PROCESSOR_BTVER2: arg_str = "btver2"; - priority = P_PROC_AVX; + priority = P_PROC_BMI; break; case PROCESSOR_BDVER1: arg_str = "bdver1"; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ef6ddcc..5b11622 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2014-12-29 Allan Sandfeld Jensen <sandf...@kde.org> + + * gcc.target/i386/funcspec-5.c: Test new multiversion targets. + * g++.dg/ext/mv17.C: Test BMI/BMI2 multiversion dispatcher. + 2014-12-28 H.J. Lu <hongjiu...@intel.com> * gcc.target/i386/pr57003.c: Skip on x32. diff --git a/gcc/testsuite/g++.dg/ext/mv17.C b/gcc/testsuite/g++.dg/ext/mv17.C new file mode 100644 index 0000000..311f217 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/mv17.C @@ -0,0 +1,91 @@ +/* Test case to check if Multiversioning works for BMI and BMI2. */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2" } */ + +#include <assert.h> + +// Check BMI feature selection works +int foo () __attribute__((target("default"))); +int foo () __attribute__((target("bmi"))); +int foo () __attribute__((target("bmi2"))); + +// Check specialized versions for archs with BMI is chosen over generic BMI versions. +int bar () __attribute__((target("default"))); +int bar () __attribute__((target("bmi"))); +int bar () __attribute__((target("bmi2"))); +int bar () __attribute__((target("arch=btver2"))); +int bar () __attribute__((target("arch=haswell"))); + +int main () +{ + int val = foo (); + + if (__builtin_cpu_supports ("bmi2")) + assert (val == 2); + else if (__builtin_cpu_supports ("bmi")) + assert (val == 1); + else + assert (val == 0); + + val = bar (); + + if (__builtin_cpu_is ("btver2") + assert (val == 5); + else if (__builtin_cpu_is ("haswell")) + assert (val == 6); + else if (__builtin_cpu_supports ("bmi2")) + assert (val == 2); + else if (__builtin_cpu_supports ("bmi")) + assert (val == 1); + else + assert (val == 0); + + return 0; +} + +int __attribute__ ((target("default"))) +foo () +{ + return 0; +} + +int __attribute__ ((target("bmi"))) +foo () +{ + return 1; +} +int __attribute__ ((target("bmi2"))) +foo () +{ + return 2; +} + +int __attribute__ ((target("default"))) +bar () +{ + return 0; +} + +int __attribute__ ((target("bmi"))) +bar () +{ + return 1; +} +int __attribute__ ((target("bmi2"))) +bar () +{ + return 2; +} + +int __attribute__ ((target("arch=btver2"))) +bar () +{ + return 5; +} + +int __attribute__ ((target("arch=haswell"))) +bar () +{ + return 6; +} + diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c b/gcc/testsuite/gcc.target/i386/funcspec-5.c index 269e610..1e37433 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c @@ -25,6 +25,10 @@ extern void test_tbm (void) __attribute__((__target__("tbm"))); extern void test_avx (void) __attribute__((__target__("avx"))); extern void test_avx2 (void) __attribute__((__target__("avx2"))); extern void test_avx512 (void) __attribute__((__target__("avx512"))); +extern void test_aes (void) __attribute__((__target__("aes"))); +extern void test_f16c (void) __attribute__((__target__("f16c"))); +extern void test_bmi (void) __attribute__((__target__("bmi"))); +extern void test_bmi2 (void) __attribute__((__target__("bmi2"))); extern void test_no_abm (void) __attribute__((__target__("no-abm"))); extern void test_no_aes (void) __attribute__((__target__("no-aes"))); @@ -48,6 +52,10 @@ extern void test_no_tbm (void) __attribute__((__target__("no-tbm"))); extern void test_no_avx (void) __attribute__((__target__("no-avx"))); extern void test_no_avx2 (void) __attribute__((__target__("no-avx2"))); extern void test_no_avx512 (void) __attribute__((__target__("no-avx512"))); +extern void test_no_aes (void) __attribute__((__target__("no-aes"))); +extern void test_no_f16c (void) __attribute__((__target__("no-f16c"))); +extern void test_no_bmi (void) __attribute__((__target__("no-bmi"))); +extern void test_no_bmi2 (void) __attribute__((__target__("no-bmi2"))); extern void test_arch_i386 (void) __attribute__((__target__("arch=i386"))); extern void test_arch_i486 (void) __attribute__((__target__("arch=i486"))); diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index efd08c4..fbd835e 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,10 @@ +2014-12-29 Allan Sandfeld Jensen <sandf...@kde.org> + + * config/i386/cpuinfo.c (enum processor_features): Add FEATURE_AES, + FEATURE_F16C, FEATURE_BMI and FEATURE_BMI2. + (get_available_features): Detect FEATURE_AES, FEATURE_F16C, + FEATURE_BMI and FEATURE_BMI2. + 2014-12-19 Matthew Fortune <matthew.fort...@imgtec.com> * config.host: Support mipsisa32r6 and mipsisa64r6. diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c index 8ca385c..813bfb0 100644 --- a/libgcc/config/i386/cpuinfo.c +++ b/libgcc/config/i386/cpuinfo.c @@ -97,7 +97,11 @@ enum processor_features FEATURE_FMA4, FEATURE_XOP, FEATURE_FMA, - FEATURE_AVX512F + FEATURE_AVX512F, + FEATURE_AES, + FEATURE_F16C, + FEATURE_BMI, + FEATURE_BMI2 }; struct __processor_model @@ -259,6 +263,8 @@ get_available_features (unsigned int ecx, unsigned int edx, features |= (1 << FEATURE_SSE2); if (ecx & bit_POPCNT) features |= (1 << FEATURE_POPCNT); + if (ecx & bit_AES) + features |= (1 << FEATURE_AES); if (ecx & bit_SSE3) features |= (1 << FEATURE_SSE3); if (ecx & bit_SSSE3) @@ -271,14 +277,20 @@ get_available_features (unsigned int ecx, unsigned int edx, features |= (1 << FEATURE_AVX); if (ecx & bit_FMA) features |= (1 << FEATURE_FMA); + if (ecx & bit_F16C) + features |= (1 << FEATURE_F16C); /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */ if (max_cpuid_level >= 7) { unsigned int eax, ebx, ecx, edx; __cpuid_count (7, 0, eax, ebx, ecx, edx); + if (ebx & bit_BMI) + features |= (1 << FEATURE_BMI); if (ebx & bit_AVX2) features |= (1 << FEATURE_AVX2); + if (ebx & bit_BMI2) + features |= (1 << FEATURE_BMI2); if (ebx & bit_AVX512F) features |= (1 << FEATURE_AVX512F); }