Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics
RKSimon updated this revision to Diff 32514. RKSimon added a comment. Added ia32 builtin undef intrinsics (I didn't bother with the mmx as I can't find any evidence of an undefined intrinsic for it). Added the avx512 intrinsics referenced in the intel intrinsics guide. Technically there's nothing stopping us making these builtin more general (non x86 specific) - I don't know if people want us to go that way though? I'll make the tests more explicit once we're happy that this is the way to go. Repository: rL LLVM http://reviews.llvm.org/D12052 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/avx512fintrin.h lib/Headers/avxintrin.h lib/Headers/emmintrin.h lib/Headers/xmmintrin.h test/CodeGen/sse-undefined.c Index: test/CodeGen/sse-undefined.c === --- test/CodeGen/sse-undefined.c +++ test/CodeGen/sse-undefined.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep xmm +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep ymm +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep zmm + +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H + +#include x86intrin.h + +__m128 test_mm_undefined_ps() { + return _mm_undefined_ps(); +} + +__m128d test_mm_undefined_pd() { + return _mm_undefined_pd(); +} + +__m128i test_mm_undefined_si128() { + return _mm_undefined_si128(); +} + +__m256 test_mm256_undefined_ps() { + return _mm256_undefined_ps(); +} + +__m256d test_mm256_undefined_pd() { + return _mm256_undefined_pd(); +} + +__m256i test_mm256_undefined_si256() { + return _mm256_undefined_si256(); +} + +__m512 test_mm512_undefined() { + return _mm512_undefined(); +} + +__m512 test_mm512_undefined_ps() { + return _mm512_undefined_ps(); +} + +__m512d test_mm512_undefined_pd() { + return _mm512_undefined_pd(); +} + +__m512i test_mm512_undefined_epi32() { + return _mm512_undefined_epi32(); +} + Index: lib/Headers/xmmintrin.h === --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -577,6 +577,12 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_undefined_ps() +{ + return (__m128)__builtin_ia32_undef128(); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w) { return (__m128){ __w, 0, 0, 0 }; Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -523,6 +523,12 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_undefined_pd() +{ + return (__m128d)__builtin_ia32_undef128(); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { return (__m128d){ __w, 0 }; @@ -1116,6 +1122,12 @@ } static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_undefined_si128() +{ + return (__m128i)__builtin_ia32_undef128(); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long q1, long long q0) { return (__m128i){ q0, q1 }; Index: lib/Headers/avxintrin.h === --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -900,6 +900,24 @@ } /* Create vectors */ +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_undefined_pd() +{ + return (__m256d)__builtin_ia32_undef256(); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_undefined_ps() +{ + return (__m256)__builtin_ia32_undef256(); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_undefined_si256() +{ + return (__m256i)__builtin_ia32_undef256(); +} + static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd(double __a, double __b, double __c, double __d) { Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -57,6 +57,30 @@ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_undefined_pd() +{ + return (__m512d)__builtin_ia32_undef512(); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_undefined() +{ + return (__m512)__builtin_ia32_undef512(); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_undefined_ps() +{ + return (__m512)__builtin_ia32_undef512(); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_undefined_epi32() +{ + return (__m512i)__builtin_ia32_undef512(); +} + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_set1_epi32(__mmask16 __M, int __A) { Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -6090,6 +6090,10 @@ Value *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } + case X86::BI__builtin_ia32_undef128: + case
RE: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics
I’m not sure how much people actually use these, but the AVX-512 versions of these, at least, can be very useful internally to implement AVX-512 intrinsics. For AVX-512, we use the same GCC builtin for all 3 versions of the intrinsic (pass-through masked, set to zero masked, and unmasked). This is the same implementation that’s used in GCC, and is fairly clean, since the only difference is in the desired pass-through values (actual value, zero, or undef). However, since we don’t actually have the undef intrinsics right now, we put a zero in the unmasked version as well, which is definitely a pessimization. The plan is to change them to use undef once the undef intrinsics are implemented. From: Eric Christopher [mailto:echri...@gmail.com] Sent: Monday, August 17, 2015 21:33 To: reviews+d12052+public+a6057f04f570e...@reviews.llvm.org; llvm-...@redking.me.uk; craig.top...@gmail.com; Kuperstein, Michael M Cc: david.majne...@gmail.com; Badouh, Asaf; cfe-commits@lists.llvm.org; Richard Smith Subject: Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics On Sun, Aug 16, 2015 at 3:05 AM Simon Pilgrim llvm-...@redking.me.ukmailto:llvm-...@redking.me.uk wrote: RKSimon added a comment. Yes using that uninitialized value has worried me as well. I originally set it to zero (and considered using __ LINE __ or __ COUNTER __) but both introduce defined behaviour that I could see causing all sorts of problems further down the line in debug vs release builds. How undefined do we want our undefined to be? ;-) Yeah, this is why I hadn't implemented them yet either. I can create __builtin_ia32_undef64mmx / __builtin_ia32_undef128 / __builtin_ia32_undef256 / __builtin_ia32_undef512 if nobody can think of a better alternative? This seems fairly heavyweight, but I don't have any better ideas. I'll assume we don't want to try to expose undef as a value in C (making it as something we could just add), if not then this seems to make the most sense. It's pretty painful/ugly though. Are people using these or did they just notice for completeness? We probably _could_ define them to zero and leave it at that. It's not pleasant and slower than it needs to be but not crazy. -eric Repository: rL LLVM http://reviews.llvm.org/D12052 - Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics
On Sun, Aug 16, 2015 at 3:05 AM Simon Pilgrim llvm-...@redking.me.uk wrote: RKSimon added a comment. Yes using that uninitialized value has worried me as well. I originally set it to zero (and considered using __ LINE __ or __ COUNTER __) but both introduce defined behaviour that I could see causing all sorts of problems further down the line in debug vs release builds. How undefined do we want our undefined to be? ;-) Yeah, this is why I hadn't implemented them yet either. I can create __builtin_ia32_undef64mmx / __builtin_ia32_undef128 / __builtin_ia32_undef256 / __builtin_ia32_undef512 if nobody can think of a better alternative? This seems fairly heavyweight, but I don't have any better ideas. I'll assume we don't want to try to expose undef as a value in C (making it as something we could just add), if not then this seems to make the most sense. It's pretty painful/ugly though. Are people using these or did they just notice for completeness? We probably _could_ define them to zero and leave it at that. It's not pleasant and slower than it needs to be but not crazy. -eric Repository: rL LLVM http://reviews.llvm.org/D12052 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics
mkuper added a comment. Thanks, Simon! I've wanted to add the _undefined intrinsics for a while now, but never got to it. Anyway, this sort of implementation somewhat worries me. Yes, I know that the gcc intrinsics do something very similar. And I also know that in practice we'll get an undef value, nothing worse (assuming reading an uninitialized automatic variable is undefined behavior to begin with - which really depends on the spec interpretation :-) ). And I know this isn't likely to change anytime soon. Still, relying on what may be undefined behavior in the header files worries me, and I'd rather not have it implemented like that. I was thinking about adding a __builtin_undef which explicitly resolves to an undef value. Does that make sense to you? Comment at: test/CodeGen/sse-undefined.c:1 @@ +1,2 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep xmm +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep ymm Perhaps a more explicit test? Repository: rL LLVM http://reviews.llvm.org/D12052 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics
RKSimon added a comment. Yes using that uninitialized value has worried me as well. I originally set it to zero (and considered using __ LINE __ or __ COUNTER __) but both introduce defined behaviour that I could see causing all sorts of problems further down the line in debug vs release builds. How undefined do we want our undefined to be? ;-) I can create __builtin_ia32_undef64mmx / __builtin_ia32_undef128 / __builtin_ia32_undef256 / __builtin_ia32_undef512 if nobody can think of a better alternative? Repository: rL LLVM http://reviews.llvm.org/D12052 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics
RKSimon created this revision. RKSimon added reviewers: craig.topper, echristo, mkuper. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. Adds missing SSE/AVX 'undefined' intrinsics (PR24040): _mm_undefined_pd + _mm256_undefined_pd _mm_undefined_ps + _mm256_undefined_ps _mm_undefined_si128 + _mm256_undefined_si256 Repository: rL LLVM http://reviews.llvm.org/D12052 Files: lib/Headers/avxintrin.h lib/Headers/emmintrin.h lib/Headers/xmmintrin.h test/CodeGen/sse-undefined.c Index: test/CodeGen/sse-undefined.c === --- test/CodeGen/sse-undefined.c +++ test/CodeGen/sse-undefined.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep xmm +// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep ymm + +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H + +#include x86intrin.h + +__m128 test_mm_undefined_ps() { + return _mm_undefined_ps(); +} + +__m128d test_mm_undefined_pd() { + return _mm_undefined_pd(); +} + +__m128i test_mm_undefined_si128() { + return _mm_undefined_si128(); +} + +__m256 test_mm256_undefined_ps() { + return _mm256_undefined_ps(); +} + +__m256d test_mm256_undefined_pd() { + return _mm256_undefined_pd(); +} + +__m256i test_mm256_undefined_si256() { + return _mm256_undefined_si256(); +} Index: lib/Headers/xmmintrin.h === --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -577,6 +577,13 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_undefined_ps() +{ + __m128 __u; + return __builtin_shufflevector(__u, __u, -1, -1, -1, -1); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w) { return (__m128){ __w, 0, 0, 0 }; Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -523,6 +523,13 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_undefined_pd() +{ + __m128d __u; + return __builtin_shufflevector(__u, __u, -1, -1); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { return (__m128d){ __w, 0 }; @@ -1116,6 +1123,13 @@ } static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_undefined_si128() +{ + __m128i __u; + return __builtin_shufflevector(__u, __u, -1, -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long q1, long long q0) { return (__m128i){ q0, q1 }; Index: lib/Headers/avxintrin.h === --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -900,6 +900,27 @@ } /* Create vectors */ +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_undefined_pd() +{ + __m256d __u; + return __builtin_shufflevector(__u, __u, -1, -1, -1, -1); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_undefined_ps() +{ + __m256 __u; + return __builtin_shufflevector(__u, __u, -1, -1, -1, -1, -1, -1, -1, -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_undefined_si256() +{ + __m256i __u; + return __builtin_shufflevector(__u, __u, -1, -1, -1, -1); +} + static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd(double __a, double __b, double __c, double __d) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits