[PATCH] D29386: Clzero flag addition and inclusion under znver1
This revision was automatically updated to reflect the committed changes. Closed by commit rL294559: [X86] Clzero flag addition and inclusion under znver1 (authored by ctopper). Changed prior to commit: https://reviews.llvm.org/D29386?vs=87437=87762#toc Repository: rL LLVM https://reviews.llvm.org/D29386 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/Basic/Targets.cpp cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/clzerointrin.h cfe/trunk/lib/Headers/module.modulemap cfe/trunk/lib/Headers/x86intrin.h cfe/trunk/test/CodeGen/builtin-clflushopt.c cfe/trunk/test/CodeGen/builtin-clzero.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/Driver/x86-target-features.c cfe/trunk/test/Preprocessor/predefined-arch-macros.c Index: cfe/trunk/include/clang/Driver/Options.td === --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -1714,6 +1714,7 @@ def mno_xsavec : Flag<["-"], "mno-xsavec">, Group; def mno_xsaves : Flag<["-"], "mno-xsaves">, Group; def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group; +def mno_clzero : Flag<["-"], "mno-clzero">, Group; def mno_pku : Flag<["-"], "mno-pku">, Group; def mno_clflushopt : Flag<["-"], "mno-clflushopt">, Group; def mno_clwb : Flag<["-"], "mno-clwb">, Group; @@ -1913,6 +1914,7 @@ def mxsavec : Flag<["-"], "mxsavec">, Group; def mxsaves : Flag<["-"], "mxsaves">, Group; def mmwaitx : Flag<["-"], "mmwaitx">, Group; +def mclzero : Flag<["-"], "mclzero">, Group; def mclflushopt : Flag<["-"], "mclflushopt">, Group; def mclwb : Flag<["-"], "mclwb">, Group; def mmovbe : Flag<["-"], "mmovbe">, Group; Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def === --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -1814,6 +1814,9 @@ TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx") TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx") +// CLZERO +TARGET_BUILTIN(__builtin_ia32_clzero, "vv*", "", "clzero") + // MSVC TARGET_HEADER_BUILTIN(_BitScanForward, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") Index: cfe/trunk/test/Preprocessor/predefined-arch-macros.c === --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c @@ -1883,6 +1883,7 @@ // CHECK_ZNVER1_M32: #define __BMI2__ 1 // CHECK_ZNVER1_M32: #define __BMI__ 1 // CHECK_ZNVER1_M32: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER1_M32: #define __CLZERO__ 1 // CHECK_ZNVER1_M32: #define __F16C__ 1 // CHECK_ZNVER1_M32: #define __FMA__ 1 // CHECK_ZNVER1_M32: #define __FSGSBASE__ 1 @@ -1924,6 +1925,7 @@ // CHECK_ZNVER1_M64: #define __BMI2__ 1 // CHECK_ZNVER1_M64: #define __BMI__ 1 // CHECK_ZNVER1_M64: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER1_M64: #define __CLZERO__ 1 // CHECK_ZNVER1_M64: #define __F16C__ 1 // CHECK_ZNVER1_M64: #define __FMA__ 1 // CHECK_ZNVER1_M64: #define __FSGSBASE__ 1 Index: cfe/trunk/test/CodeGen/builtin-clzero.c === --- cfe/trunk/test/CodeGen/builtin-clzero.c +++ cfe/trunk/test/CodeGen/builtin-clzero.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +clzero -emit-llvm -o - -Wall -Werror | FileCheck %s +#define __MM_MALLOC_H + +#include +void test_mm_clzero(void * __m) { + //CHECK-LABEL: @test_mm_clzero + //CHECK: @llvm.x86.clflushopt + _mm_clzero(__m); +} Index: cfe/trunk/test/CodeGen/builtins-x86.c === --- cfe/trunk/test/CodeGen/builtins-x86.c +++ cfe/trunk/test/CodeGen/builtins-x86.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -emit-llvm -o %t %s -// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -fsyntax-only -o %t %s +// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -emit-llvm -o %t %s +// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -fsyntax-only -o %t %s #ifdef USE_ALL #define USE_3DNOW @@ -285,6
[PATCH] D29386: Clzero flag addition and inclusion under znver1
GGanesh added a comment. Thank you @craig.topper. Just want to check if the patch can be commited to 4.0 release branch as well. I understand that the trunk is in blocker bug-fix stage but just want to get it confirmed. Repository: rL LLVM https://reviews.llvm.org/D29386 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D29386: Clzero flag addition and inclusion under znver1
craig.topper accepted this revision. craig.topper added a comment. This revision is now accepted and ready to land. LGTM, but i'll make some additional testing changes before committing it. -Add __CLZERO__ checks to test/Preprocessor/predefined-arch-macros.c for znver1 -Add command line testing to test/Driver/x86-target-features.c -Add a test to verify the generated IR. Repository: rL LLVM https://reviews.llvm.org/D29386 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D29386: Clzero flag addition and inclusion under znver1
GGanesh updated this revision to Diff 87437. GGanesh added a comment. Updated the builtins test for "__builtin_ia32_clzero" Repository: rL LLVM https://reviews.llvm.org/D29386 Files: include/clang/Basic/BuiltinsX86.def include/clang/Driver/Options.td lib/Basic/Targets.cpp lib/Headers/CMakeLists.txt lib/Headers/clzerointrin.h lib/Headers/module.modulemap lib/Headers/x86intrin.h test/CodeGen/builtins-x86.c Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -emit-llvm -o %t %s -// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -fsyntax-only -o %t %s +// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -emit-llvm -o %t %s +// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -fsyntax-only -o %t %s #ifdef USE_ALL #define USE_3DNOW @@ -285,6 +285,7 @@ (void) __builtin_ia32_monitorx(tmp_vp, tmp_Ui, tmp_Ui); (void) __builtin_ia32_mwaitx(tmp_Ui, tmp_Ui, tmp_Ui); + (void) __builtin_ia32_clzero(tmp_vp); tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i); tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f); Index: lib/Headers/x86intrin.h === --- lib/Headers/x86intrin.h +++ lib/Headers/x86intrin.h @@ -80,6 +80,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__) +#include +#endif + /* FIXME: LWP */ #endif /* __X86INTRIN_H */ Index: lib/Headers/module.modulemap === --- lib/Headers/module.modulemap +++ lib/Headers/module.modulemap @@ -61,6 +61,7 @@ textual header "xopintrin.h" textual header "fma4intrin.h" textual header "mwaitxintrin.h" +textual header "clzerointrin.h" explicit module mm_malloc { requires !freestanding Index: lib/Headers/clzerointrin.h === --- lib/Headers/clzerointrin.h +++ lib/Headers/clzerointrin.h @@ -0,0 +1,50 @@ +/*===--- clzerointrin.h - CLZERO --=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===---=== + */ +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _CLZEROINTRIN_H +#define _CLZEROINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("clzero"))) + +/// \brief Loads the cache line address and zero's out the cacheline +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CLZERO instruction. +/// +/// \param __line +///A pointer to a cacheline which needs to be zeroed out. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_clzero (void * __line) +{ + __builtin_ia32_clzero ((void *)__line); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* _CLZEROINTRIN_H */ Index: lib/Headers/CMakeLists.txt === --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -28,6 +28,7 @@
[PATCH] D29386: Clzero flag addition and inclusion under znver1
RKSimon added a comment. Still missing a test in llvm\tools\clang\test\CodeGen\builtins-x86.c Repository: rL LLVM https://reviews.llvm.org/D29386 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D29386: Clzero flag addition and inclusion under znver1
GGanesh updated this revision to Diff 87386. GGanesh added a comment. Updated for review comments. Repository: rL LLVM https://reviews.llvm.org/D29386 Files: include/clang/Basic/BuiltinsX86.def include/clang/Driver/Options.td lib/Basic/Targets.cpp lib/Headers/CMakeLists.txt lib/Headers/clzerointrin.h lib/Headers/module.modulemap lib/Headers/x86intrin.h Index: lib/Headers/x86intrin.h === --- lib/Headers/x86intrin.h +++ lib/Headers/x86intrin.h @@ -80,6 +80,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__) +#include +#endif + /* FIXME: LWP */ #endif /* __X86INTRIN_H */ Index: lib/Headers/module.modulemap === --- lib/Headers/module.modulemap +++ lib/Headers/module.modulemap @@ -61,6 +61,7 @@ textual header "xopintrin.h" textual header "fma4intrin.h" textual header "mwaitxintrin.h" +textual header "clzerointrin.h" explicit module mm_malloc { requires !freestanding Index: lib/Headers/clzerointrin.h === --- lib/Headers/clzerointrin.h +++ lib/Headers/clzerointrin.h @@ -0,0 +1,50 @@ +/*===--- clzerointrin.h - CLZERO --=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===---=== + */ +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _CLZEROINTRIN_H +#define _CLZEROINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("clzero"))) + +/// \brief Loads the cache line address and zero's out the cacheline +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CLZERO instruction. +/// +/// \param __line +///A pointer to a cacheline which needs to be zeroed out. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_clzero (void * __line) +{ + __builtin_ia32_clzero ((void *)__line); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* _CLZEROINTRIN_H */ Index: lib/Headers/CMakeLists.txt === --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -28,6 +28,7 @@ __clang_cuda_intrinsics.h __clang_cuda_math_forward_declares.h __clang_cuda_runtime_wrapper.h + clzerointrin.h cpuid.h clflushoptintrin.h emmintrin.h Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -2489,6 +2489,7 @@ bool HasXSAVEC = false; bool HasXSAVES = false; bool HasMWAITX = false; + bool HasCLZERO = false; bool HasPKU = false; bool HasCLFLUSHOPT = false; bool HasPCOMMIT = false; @@ -3205,6 +3206,7 @@ setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "bmi2", true); setFeatureEnabledImpl(Features, "clflushopt", true); +setFeatureEnabledImpl(Features, "clzero", true); setFeatureEnabledImpl(Features, "cx16", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "fma", true); Index: include/clang/Driver/Options.td === --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1714,6 +1714,7 @@ def mno_xsavec : Flag<["-"], "mno-xsavec">, Group; def mno_xsaves : Flag<["-"], "mno-xsaves">, Group; def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group; +def mno_clzero : Flag<["-"], "mno-clzero">, Group; def mno_pku : Flag<["-"], "mno-pku">, Group; def munaligned_access : Flag<["-"], "munaligned-access">, Group, @@ -1907,6 +1908,7 @@ def mxsavec : Flag<["-"], "mxsavec">, Group; def