Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
This revision was automatically updated to reflect the committed changes. Closed by commit rL273311: [x86] AVX FP compare builtins should require AVX target feature (PR28112) (authored by spatel). Changed prior to commit: http://reviews.llvm.org/D21306?vs=60596=61437#toc Repository: rL LLVM http://reviews.llvm.org/D21306 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/test/CodeGen/target-features-error-2.c Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def === --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -219,16 +219,14 @@ TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "", "sse") @@ -242,16 +240,14 @@ TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "", "sse") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "", "sse2") @@ -453,8 +449,12 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") Index: cfe/trunk/test/CodeGen/target-features-error-2.c === --- cfe/trunk/test/CodeGen/target-features-error-2.c +++ cfe/trunk/test/CodeGen/target-features-error-2.c @@ -1,7 +1,38 @@ -// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_SSE42 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_1 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_2 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_3 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_4 + #define __MM_MALLOC_H #include +#if NEED_SSE42 int baz(__m256i a) { return _mm256_extract_epi32(a, 3); // expected-error {{always_inline function '_mm256_extract_epi32' requires target feature 'sse4.2', but would be inlined into function 'baz' that is compiled without support for 'sse4.2'}} } +#endif + +#if NEED_AVX_1 +__m128 need_avx(__m128 a, __m128 b) { + return _mm_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}} +} +#endif + +#if NEED_AVX_2 +__m128 need_avx(__m128 a, __m128 b) { + return _mm_cmp_ss(a, b, 0); // expected-error {{'__builtin_ia32_cmpss' needs target feature avx}} +} +#endif + +#if NEED_AVX_3 +__m128d need_avx(__m128d a, __m128d b) { + return _mm_cmp_pd(a, b, 0); // expected-error {{'__builtin_ia32_cmppd' needs target feature avx}} +} +#endif
Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
RKSimon accepted this revision. RKSimon added a comment. This revision is now accepted and ready to land. LGTM - the compile warning is clear and it could be a problem if we allow undefined values through on pre-AVX targets. The only other thing we could do is handle these in CGBuiltin and 'accept' 0-7 values through on sse/sse2 targets and assert on other values but I don't see how this would be better. http://reviews.llvm.org/D21306 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
RKSimon added a comment. It seems like part of the need for this is because the _mm_cmp_ps style intrinsics are defined as macros (to get around the problem of trying to use an immediate as an argument): #define _mm_cmp_ps(a, b, c) __extension__ ({ \ (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (c)); }) which means clang can't use a __target__("avx") attribute to stop their use. Given that I'm happy with this patch's approach - anyone else have any suggestions? http://reviews.llvm.org/D21306 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
spatel added a comment. In http://reviews.llvm.org/D21306#456965, @echristo wrote: > The 128 bit versions should only be selecting for sse functions and shouldn't > need avx to work? What instructions are getting emitted here? No, the 128-bit versions of these C intrinsics are strictly for AVX versions of the instructions (eg, vcmpps). Example from the bug report: cmp = _mm_cmp_ps((__m128)a, (__m128)b, 8); We're currently emitting illegal SSE instructions like: cmpps $0x8, %xmm1, %xmm0 <--- anything over '7' is reserved/undef http://reviews.llvm.org/D21306 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
echristo added a comment. The 128 bit versions should only be selecting for sse functions and shouldn't need avx to work? What instructions are getting emitted here? http://reviews.llvm.org/D21306 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
spatel created this revision. spatel added reviewers: echristo, bogner, RKSimon. spatel added a subscriber: cfe-commits. Herald added subscribers: mehdi_amini, mcrosier. This is a fix for PR28112: https://llvm.org/bugs/show_bug.cgi?id=28112 The FP comparison intrinsics that take an immediate parameter rather than specifying a comparison predicate in the function name were added with AVX (these are macros in avxintrin.h). This makes clang behave more like like gcc and matches the Intel documentation, eg: VCMPPS: __m128 _mm_cmp_ps(__m128 a, __m128 b, const int imm) 'V' means this is intended to only work with the AVX form of the instruction. http://reviews.llvm.org/D21306 Files: include/clang/Basic/BuiltinsX86.def test/CodeGen/target-features-error-2.c Index: test/CodeGen/target-features-error-2.c === --- test/CodeGen/target-features-error-2.c +++ test/CodeGen/target-features-error-2.c @@ -1,7 +1,38 @@ -// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_SSE42 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_1 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_2 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_3 +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o - -D NEED_AVX_4 + #define __MM_MALLOC_H #include +#if NEED_SSE42 int baz(__m256i a) { return _mm256_extract_epi32(a, 3); // expected-error {{always_inline function '_mm256_extract_epi32' requires target feature 'sse4.2', but would be inlined into function 'baz' that is compiled without support for 'sse4.2'}} } +#endif + +#if NEED_AVX_1 +__m128 need_avx(__m128 a, __m128 b) { + return _mm_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}} +} +#endif + +#if NEED_AVX_2 +__m128 need_avx(__m128 a, __m128 b) { + return _mm_cmp_ss(a, b, 0); // expected-error {{'__builtin_ia32_cmpss' needs target feature avx}} +} +#endif + +#if NEED_AVX_3 +__m128d need_avx(__m128d a, __m128d b) { + return _mm_cmp_pd(a, b, 0); // expected-error {{'__builtin_ia32_cmppd' needs target feature avx}} +} +#endif + +#if NEED_AVX_4 +__m128d need_avx(__m128d a, __m128d b) { + return _mm_cmp_sd(a, b, 0); // expected-error {{'__builtin_ia32_cmpsd' needs target feature avx}} +} +#endif Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -219,16 +219,14 @@ TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "", "sse") @@ -242,16 +240,14 @@ TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "", "sse") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "", "sse2") @@ -453,8 +449,12 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx")