Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-30 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL271218: [X86][SSE] _mm_store1_ps/_mm_store1_pd should 
require an aligned pointer (authored by RKSimon).

Changed prior to commit:
  http://reviews.llvm.org/D20617?vs=58397=58979#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20617

Files:
  cfe/trunk/lib/Headers/emmintrin.h
  cfe/trunk/lib/Headers/xmmintrin.h
  cfe/trunk/test/CodeGen/sse2-builtins.c

Index: cfe/trunk/test/CodeGen/sse2-builtins.c
===
--- cfe/trunk/test/CodeGen/sse2-builtins.c
+++ cfe/trunk/test/CodeGen/sse2-builtins.c
@@ -1205,6 +1205,13 @@
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 
Index: cfe/trunk/lib/Headers/emmintrin.h
===
--- cfe/trunk/lib/Headers/emmintrin.h
+++ cfe/trunk/lib/Headers/emmintrin.h
@@ -588,19 +588,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  *(__m128d*)__dp = __a;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
-  struct __mm_store1_pd_struct {
-double __u[2];
-  } __attribute__((__packed__, __may_alias__));
-  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
-  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
+  _mm_store_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {
-  *(__m128d *)__dp = __a;
+  return _mm_store1_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
Index: cfe/trunk/lib/Headers/xmmintrin.h
===
--- cfe/trunk/lib/Headers/xmmintrin.h
+++ cfe/trunk/lib/Headers/xmmintrin.h
@@ -1593,22 +1593,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store1_ps(float *__p, __m128 __a)
+_mm_store_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
-  _mm_storeu_ps(__p, __a);
+  *(__m128*)__p = __a;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps1(float *__p, __m128 __a)
+_mm_store1_ps(float *__p, __m128 __a)
 {
-return _mm_store1_ps(__p, __a);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
+  _mm_store_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
+_mm_store_ps1(float *__p, __m128 __a)
 {
-  *(__m128 *)__p = __a;
+  return _mm_store1_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS


Index: cfe/trunk/test/CodeGen/sse2-builtins.c
===
--- cfe/trunk/test/CodeGen/sse2-builtins.c
+++ cfe/trunk/test/CodeGen/sse2-builtins.c
@@ -1205,6 +1205,13 @@
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 
Index: cfe/trunk/lib/Headers/emmintrin.h
===
--- cfe/trunk/lib/Headers/emmintrin.h
+++ cfe/trunk/lib/Headers/emmintrin.h
@@ -588,19 +588,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  

Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-27 Thread Simon Pilgrim via cfe-commits
RKSimon added inline comments.


Comment at: lib/Headers/emmintrin.h:598
@@ -594,3 +597,3 @@
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {

majnemer wrote:
> You could use `__attribute__((align_value(16)))` no?
Technically yes but AFAICT there are no other users of this approach in the 
headers - is it something that we should be encouraging do you think?

Craig - I think you wrote in a commit about dropping the unaligned intrinsics, 
is that how you'd do it? 


Repository:
  rL LLVM

http://reviews.llvm.org/D20617



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-25 Thread David Majnemer via cfe-commits
majnemer added a subscriber: majnemer.


Comment at: lib/Headers/emmintrin.h:598
@@ -594,3 +597,3 @@
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {

You could use `__attribute__((align_value(16)))` no?


Repository:
  rL LLVM

http://reviews.llvm.org/D20617



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-25 Thread Craig Topper via cfe-commits
craig.topper accepted this revision.
craig.topper added a comment.
This revision is now accepted and ready to land.

Given that its documented as being aligned. I'm ok with it. LGTM


Repository:
  rL LLVM

http://reviews.llvm.org/D20617



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-25 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20617#439200, @craig.topper wrote:

> Can you double check gcc's xmmintrin.h again. I'm pretty sure _mm_store1_ps
>  is calling _mm_storeu_ps.


Yes you're right - for gcc _mm_store1_pd is aligned (and there is a comment 
saying it must be), but _mm_store1_ps is unaligned. The intel intrinsics docs 
and msvc codegen both set both ps and pd versions to aligned store though.

If you wish I can just do the pd fixes - we are alone in doing a extract + 
2*movsd - the rest all use shufpd+movapd

Suggestions for ps?


Repository:
  rL LLVM

http://reviews.llvm.org/D20617



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-25 Thread Craig Topper via cfe-commits
Can you double check gcc's xmmintrin.h again. I'm pretty sure _mm_store1_ps
is calling _mm_storeu_ps.

On Wed, May 25, 2016 at 3:31 AM, Simon Pilgrim 
wrote:

> RKSimon created this revision.
> RKSimon added reviewers: craig.topper, spatel, andreadb.
> RKSimon added a subscriber: cfe-commits.
> RKSimon set the repository for this revision to rL LLVM.
>
> According to the gcc headers, intel intrinsics docs and msdn codegen the
> _mm_store1_ps/_mm_store1_pd (and their _mm_store_ps1/_mm_store_pd1
> analogues) should require an aligned pointer - the clang headers are the
> only implementation I can find that assume non-aligned stores (by storing
> with _mm_storeu_ps/_mm_storeu_pd).
>
> This patch raises the alignment requirements to match the other
> implementations by calling _mm_store_ps/_mm_store_pd instead.
>
> I've also added the missing _mm_store_pd1 intrinsic (which maps to
> _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps).
>
> As a followup I'll update the llvm fast-isel tests to match this codegen.
>
> Repository:
>   rL LLVM
>
> http://reviews.llvm.org/D20617
>
> Files:
>   lib/Headers/emmintrin.h
>   lib/Headers/xmmintrin.h
>   test/CodeGen/sse-builtins.c
>   test/CodeGen/sse2-builtins.c
>
>


-- 
~Craig
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-25 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: craig.topper, spatel, andreadb.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

According to the gcc headers, intel intrinsics docs and msdn codegen the 
_mm_store1_ps/_mm_store1_pd (and their _mm_store_ps1/_mm_store_pd1 analogues) 
should require an aligned pointer - the clang headers are the only 
implementation I can find that assume non-aligned stores (by storing with 
_mm_storeu_ps/_mm_storeu_pd).

This patch raises the alignment requirements to match the other implementations 
by calling _mm_store_ps/_mm_store_pd instead.

I've also added the missing _mm_store_pd1 intrinsic (which maps to 
_mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps).

As a followup I'll update the llvm fast-isel tests to match this codegen.

Repository:
  rL LLVM

http://reviews.llvm.org/D20617

Files:
  lib/Headers/emmintrin.h
  lib/Headers/xmmintrin.h
  test/CodeGen/sse-builtins.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -1205,6 +1205,13 @@
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 
Index: test/CodeGen/sse-builtins.c
===
--- test/CodeGen/sse-builtins.c
+++ test/CodeGen/sse-builtins.c
@@ -651,7 +651,7 @@
 void test_mm_store_ps1(float* x, __m128 y) {
   // CHECK-LABEL: test_mm_store_ps1
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
-  // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
   _mm_store_ps1(x, y);
 }
 
@@ -665,7 +665,7 @@
 void test_mm_store1_ps(float* x, __m128 y) {
   // CHECK-LABEL: test_mm_store1_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
-  // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
   _mm_store1_ps(x, y);
 }
 
Index: lib/Headers/xmmintrin.h
===
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -1590,22 +1590,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store1_ps(float *__p, __m128 __a)
+_mm_store_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
-  _mm_storeu_ps(__p, __a);
+  *(__m128 *)__p = __a;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps1(float *__p, __m128 __a)
+_mm_store1_ps(float *__p, __m128 __a)
 {
-return _mm_store1_ps(__p, __a);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
+  _mm_store_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
+_mm_store_ps1(float *__p, __m128 __a)
 {
-  *(__m128 *)__p = __a;
+  return _mm_store1_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -582,19 +582,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  *(__m128d *)__dp = __a;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
-  struct __mm_store1_pd_struct {
-double __u[2];
-  } __attribute__((__packed__, __may_alias__));
-  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
-  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
+  _mm_store_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {
-  *(__m128d *)__dp = __a;
+  return _mm_store1_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
___