Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics

2015-08-19 Thread Simon Pilgrim via cfe-commits
RKSimon updated this revision to Diff 32514.
RKSimon added a comment.

Added ia32 builtin undef intrinsics (I didn't bother with the mmx as I can't 
find any evidence of an undefined intrinsic for it). Added the avx512 
intrinsics referenced in the intel intrinsics guide.

Technically there's nothing stopping us making these builtin more general (non 
x86 specific) - I don't know if people want us to go that way though?

I'll make the tests more explicit once we're happy that this is the way to go.


Repository:
  rL LLVM

http://reviews.llvm.org/D12052

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/avx512fintrin.h
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  lib/Headers/xmmintrin.h
  test/CodeGen/sse-undefined.c

Index: test/CodeGen/sse-undefined.c
===
--- test/CodeGen/sse-undefined.c
+++ test/CodeGen/sse-undefined.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep xmm
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep ymm
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep zmm
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include x86intrin.h
+
+__m128 test_mm_undefined_ps() {
+  return _mm_undefined_ps();
+}
+
+__m128d test_mm_undefined_pd() {
+  return _mm_undefined_pd();
+}
+
+__m128i test_mm_undefined_si128() {
+  return _mm_undefined_si128();
+}
+
+__m256 test_mm256_undefined_ps() {
+  return _mm256_undefined_ps();
+}
+
+__m256d test_mm256_undefined_pd() {
+  return _mm256_undefined_pd();
+}
+
+__m256i test_mm256_undefined_si256() {
+  return _mm256_undefined_si256();
+}
+
+__m512 test_mm512_undefined() {
+  return _mm512_undefined();
+}
+
+__m512 test_mm512_undefined_ps() {
+  return _mm512_undefined_ps();
+}
+
+__m512d test_mm512_undefined_pd() {
+  return _mm512_undefined_pd();
+}
+
+__m512i test_mm512_undefined_epi32() {
+  return _mm512_undefined_epi32();
+}
+
Index: lib/Headers/xmmintrin.h
===
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -577,6 +577,12 @@
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_undefined_ps()
+{
+  return (__m128)__builtin_ia32_undef128();
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ss(float __w)
 {
   return (__m128){ __w, 0, 0, 0 };
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -523,6 +523,12 @@
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_undefined_pd()
+{
+  return (__m128d)__builtin_ia32_undef128();
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_set_sd(double __w)
 {
   return (__m128d){ __w, 0 };
@@ -1116,6 +1122,12 @@
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_undefined_si128()
+{
+  return (__m128i)__builtin_ia32_undef128();
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi64x(long long q1, long long q0)
 {
   return (__m128i){ q0, q1 };
Index: lib/Headers/avxintrin.h
===
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -900,6 +900,24 @@
 }
 
 /* Create vectors */
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_undefined_pd()
+{
+  return (__m256d)__builtin_ia32_undef256();
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_undefined_ps()
+{
+  return (__m256)__builtin_ia32_undef256();
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_undefined_si256()
+{
+  return (__m256i)__builtin_ia32_undef256();
+}
+
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_set_pd(double __a, double __b, double __c, double __d)
 {
Index: lib/Headers/avx512fintrin.h
===
--- lib/Headers/avx512fintrin.h
+++ lib/Headers/avx512fintrin.h
@@ -57,6 +57,30 @@
   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_undefined_pd()
+{
+  return (__m512d)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined()
+{
+  return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined_ps()
+{
+  return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_undefined_epi32()
+{
+  return (__m512i)__builtin_ia32_undef512();
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {
Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -6090,6 +6090,10 @@
 Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
 return Builder.CreateCall(F, {Address, RW, Locality, Data});
   }
+  case X86::BI__builtin_ia32_undef128:
+  case 

RE: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics

2015-08-18 Thread Kuperstein, Michael M via cfe-commits
I’m not sure how much people actually use these, but the AVX-512 versions of 
these, at least, can be very useful internally to implement AVX-512 intrinsics.
For AVX-512, we use the same GCC builtin for all 3 versions of the intrinsic 
(pass-through masked, set to zero masked, and unmasked). This is the same 
implementation that’s used in GCC, and is fairly clean, since the only 
difference is in the desired pass-through values (actual value, zero, or undef).

However, since we don’t actually have the undef intrinsics right now, we put a 
zero in the unmasked version as well, which is definitely a pessimization.
The plan is to change them to use undef once the undef intrinsics are 
implemented.


From: Eric Christopher [mailto:echri...@gmail.com]
Sent: Monday, August 17, 2015 21:33
To: reviews+d12052+public+a6057f04f570e...@reviews.llvm.org; 
llvm-...@redking.me.uk; craig.top...@gmail.com; Kuperstein, Michael M
Cc: david.majne...@gmail.com; Badouh, Asaf; cfe-commits@lists.llvm.org; Richard 
Smith
Subject: Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics


On Sun, Aug 16, 2015 at 3:05 AM Simon Pilgrim 
llvm-...@redking.me.ukmailto:llvm-...@redking.me.uk wrote:
RKSimon added a comment.

Yes using that uninitialized value has worried me as well. I originally set it 
to zero (and considered using __ LINE __ or __ COUNTER __) but both introduce 
defined behaviour that I could see causing all sorts of problems further down 
the line in debug vs release builds. How undefined do we want our undefined to 
be? ;-)

Yeah, this is why I hadn't implemented them yet either.

I can create __builtin_ia32_undef64mmx / __builtin_ia32_undef128 / 
__builtin_ia32_undef256 / __builtin_ia32_undef512 if nobody can think of a 
better alternative?

This seems fairly heavyweight, but I don't have any better ideas. I'll assume 
we don't want to try to expose undef as a value in C (making it as something we 
could just add), if not then this seems to make the most sense. It's pretty 
painful/ugly though.

Are people using these or did they just notice for completeness? We probably 
_could_ define them to zero and leave it at that. It's not pleasant and slower 
than it needs to be but not crazy.

-eric


Repository:
  rL LLVM

http://reviews.llvm.org/D12052


-
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics

2015-08-17 Thread Eric Christopher via cfe-commits
On Sun, Aug 16, 2015 at 3:05 AM Simon Pilgrim llvm-...@redking.me.uk
wrote:

 RKSimon added a comment.

 Yes using that uninitialized value has worried me as well. I originally
 set it to zero (and considered using __ LINE __ or __ COUNTER __) but both
 introduce defined behaviour that I could see causing all sorts of problems
 further down the line in debug vs release builds. How undefined do we want
 our undefined to be? ;-)


Yeah, this is why I hadn't implemented them yet either.


 I can create __builtin_ia32_undef64mmx / __builtin_ia32_undef128 /
 __builtin_ia32_undef256 / __builtin_ia32_undef512 if nobody can think of a
 better alternative?


This seems fairly heavyweight, but I don't have any better ideas. I'll
assume we don't want to try to expose undef as a value in C (making it as
something we could just add), if not then this seems to make the most
sense. It's pretty painful/ugly though.

Are people using these or did they just notice for completeness? We
probably _could_ define them to zero and leave it at that. It's not
pleasant and slower than it needs to be but not crazy.

-eric



 Repository:
   rL LLVM

 http://reviews.llvm.org/D12052




___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics

2015-08-16 Thread Michael Kuperstein via cfe-commits
mkuper added a comment.

Thanks, Simon!
I've wanted to add the _undefined intrinsics for a while now, but never got to 
it.

Anyway, this sort of implementation somewhat worries me. 
Yes, I know that the gcc intrinsics do something very similar. 
And I also know that in practice we'll get an undef value, nothing worse 
(assuming reading an uninitialized automatic variable is undefined behavior to 
begin with - which really depends on the spec interpretation :-) ). 
And I know this isn't likely to change anytime soon.

Still, relying on what may be undefined behavior in the header files worries 
me, and I'd rather not have it implemented like that.
I was thinking about adding a __builtin_undef which explicitly resolves to an 
undef value. 
Does that make sense to you?



Comment at: test/CodeGen/sse-undefined.c:1
@@ +1,2 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep xmm
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep ymm

Perhaps a more explicit test?


Repository:
  rL LLVM

http://reviews.llvm.org/D12052



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics

2015-08-16 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

Yes using that uninitialized value has worried me as well. I originally set it 
to zero (and considered using __ LINE __ or __ COUNTER __) but both introduce 
defined behaviour that I could see causing all sorts of problems further down 
the line in debug vs release builds. How undefined do we want our undefined to 
be? ;-)

I can create __builtin_ia32_undef64mmx / __builtin_ia32_undef128 / 
__builtin_ia32_undef256 / __builtin_ia32_undef512 if nobody can think of a 
better alternative?


Repository:
  rL LLVM

http://reviews.llvm.org/D12052



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D12052: [X86][SSE] Add _mm_undefined_* intrinsics

2015-08-15 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: craig.topper, echristo, mkuper.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

Adds missing SSE/AVX 'undefined' intrinsics (PR24040):

_mm_undefined_pd + _mm256_undefined_pd
_mm_undefined_ps + _mm256_undefined_ps
_mm_undefined_si128 + _mm256_undefined_si256



Repository:
  rL LLVM

http://reviews.llvm.org/D12052

Files:
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  lib/Headers/xmmintrin.h
  test/CodeGen/sse-undefined.c

Index: test/CodeGen/sse-undefined.c
===
--- test/CodeGen/sse-undefined.c
+++ test/CodeGen/sse-undefined.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep xmm
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -o - | not grep ymm
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include x86intrin.h
+
+__m128 test_mm_undefined_ps() {
+  return _mm_undefined_ps();
+}
+
+__m128d test_mm_undefined_pd() {
+  return _mm_undefined_pd();
+}
+
+__m128i test_mm_undefined_si128() {
+  return _mm_undefined_si128();
+}
+
+__m256 test_mm256_undefined_ps() {
+  return _mm256_undefined_ps();
+}
+
+__m256d test_mm256_undefined_pd() {
+  return _mm256_undefined_pd();
+}
+
+__m256i test_mm256_undefined_si256() {
+  return _mm256_undefined_si256();
+}
Index: lib/Headers/xmmintrin.h
===
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -577,6 +577,13 @@
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_undefined_ps()
+{
+  __m128 __u;
+  return __builtin_shufflevector(__u, __u, -1, -1, -1, -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ss(float __w)
 {
   return (__m128){ __w, 0, 0, 0 };
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -523,6 +523,13 @@
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_undefined_pd()
+{
+  __m128d __u;
+  return __builtin_shufflevector(__u, __u, -1, -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_set_sd(double __w)
 {
   return (__m128d){ __w, 0 };
@@ -1116,6 +1123,13 @@
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_undefined_si128()
+{
+  __m128i __u;
+  return __builtin_shufflevector(__u, __u, -1, -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi64x(long long q1, long long q0)
 {
   return (__m128i){ q0, q1 };
Index: lib/Headers/avxintrin.h
===
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -900,6 +900,27 @@
 }
 
 /* Create vectors */
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_undefined_pd()
+{
+  __m256d __u;
+  return __builtin_shufflevector(__u, __u, -1, -1, -1, -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_undefined_ps()
+{
+  __m256 __u;
+  return __builtin_shufflevector(__u, __u, -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_undefined_si256()
+{
+  __m256i __u;
+  return __builtin_shufflevector(__u, __u, -1, -1, -1, -1);
+}
+
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_set_pd(double __a, double __b, double __c, double __d)
 {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits