Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
This revision was automatically updated to reflect the committed changes. Closed by commit rL271218: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer (authored by RKSimon). Changed prior to commit: http://reviews.llvm.org/D20617?vs=58397&id=58979#toc Repository: rL LLVM http://reviews.llvm.org/D20617 Files: cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/sse2-builtins.c Index: cfe/trunk/test/CodeGen/sse2-builtins.c === --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -1205,6 +1205,13 @@ _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 _mm_store1_pd(x, y); } Index: cfe/trunk/lib/Headers/emmintrin.h === --- cfe/trunk/lib/Headers/emmintrin.h +++ cfe/trunk/lib/Headers/emmintrin.h @@ -588,19 +588,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d*)__dp = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { - struct __mm_store1_pd_struct { -double __u[2]; - } __attribute__((__packed__, __may_alias__)); - ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; - ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; + __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); + _mm_store_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { - *(__m128d *)__dp = __a; + return _mm_store1_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS Index: cfe/trunk/lib/Headers/xmmintrin.h === --- cfe/trunk/lib/Headers/xmmintrin.h +++ cfe/trunk/lib/Headers/xmmintrin.h @@ -1593,22 +1593,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store1_ps(float *__p, __m128 __a) +_mm_store_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); - _mm_storeu_ps(__p, __a); + *(__m128*)__p = __a; } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps1(float *__p, __m128 __a) +_mm_store1_ps(float *__p, __m128 __a) { -return _mm_store1_ps(__p, __a); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); + _mm_store_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps(float *__p, __m128 __a) +_mm_store_ps1(float *__p, __m128 __a) { - *(__m128 *)__p = __a; + return _mm_store1_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS Index: cfe/trunk/test/CodeGen/sse2-builtins.c === --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -1205,6 +1205,13 @@ _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 _mm_store1_pd(x, y); } Index: cfe/trunk/lib/Headers/emmintrin.h === --- cfe/trunk/lib/Headers/emmintrin.h +++ cfe/trunk/lib/Headers/emmintrin.h @@ -588,19 +588,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(_
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
RKSimon added inline comments. Comment at: lib/Headers/emmintrin.h:598 @@ -594,3 +597,3 @@ static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { majnemer wrote: > You could use `__attribute__((align_value(16)))` no? Technically yes but AFAICT there are no other users of this approach in the headers - is it something that we should be encouraging do you think? Craig - I think you wrote in a commit about dropping the unaligned intrinsics, is that how you'd do it? Repository: rL LLVM http://reviews.llvm.org/D20617 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
majnemer added a subscriber: majnemer. Comment at: lib/Headers/emmintrin.h:598 @@ -594,3 +597,3 @@ static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { You could use `__attribute__((align_value(16)))` no? Repository: rL LLVM http://reviews.llvm.org/D20617 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
craig.topper accepted this revision. craig.topper added a comment. This revision is now accepted and ready to land. Given that its documented as being aligned. I'm ok with it. LGTM Repository: rL LLVM http://reviews.llvm.org/D20617 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
RKSimon added a comment. In http://reviews.llvm.org/D20617#439200, @craig.topper wrote: > Can you double check gcc's xmmintrin.h again. I'm pretty sure _mm_store1_ps > is calling _mm_storeu_ps. Yes you're right - for gcc _mm_store1_pd is aligned (and there is a comment saying it must be), but _mm_store1_ps is unaligned. The intel intrinsics docs and msvc codegen both set both ps and pd versions to aligned store though. If you wish I can just do the pd fixes - we are alone in doing a extract + 2*movsd - the rest all use shufpd+movapd Suggestions for ps? Repository: rL LLVM http://reviews.llvm.org/D20617 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
Can you double check gcc's xmmintrin.h again. I'm pretty sure _mm_store1_ps is calling _mm_storeu_ps. On Wed, May 25, 2016 at 3:31 AM, Simon Pilgrim wrote: > RKSimon created this revision. > RKSimon added reviewers: craig.topper, spatel, andreadb. > RKSimon added a subscriber: cfe-commits. > RKSimon set the repository for this revision to rL LLVM. > > According to the gcc headers, intel intrinsics docs and msdn codegen the > _mm_store1_ps/_mm_store1_pd (and their _mm_store_ps1/_mm_store_pd1 > analogues) should require an aligned pointer - the clang headers are the > only implementation I can find that assume non-aligned stores (by storing > with _mm_storeu_ps/_mm_storeu_pd). > > This patch raises the alignment requirements to match the other > implementations by calling _mm_store_ps/_mm_store_pd instead. > > I've also added the missing _mm_store_pd1 intrinsic (which maps to > _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps). > > As a followup I'll update the llvm fast-isel tests to match this codegen. > > Repository: > rL LLVM > > http://reviews.llvm.org/D20617 > > Files: > lib/Headers/emmintrin.h > lib/Headers/xmmintrin.h > test/CodeGen/sse-builtins.c > test/CodeGen/sse2-builtins.c > > -- ~Craig ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
RKSimon created this revision. RKSimon added reviewers: craig.topper, spatel, andreadb. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. According to the gcc headers, intel intrinsics docs and msdn codegen the _mm_store1_ps/_mm_store1_pd (and their _mm_store_ps1/_mm_store_pd1 analogues) should require an aligned pointer - the clang headers are the only implementation I can find that assume non-aligned stores (by storing with _mm_storeu_ps/_mm_storeu_pd). This patch raises the alignment requirements to match the other implementations by calling _mm_store_ps/_mm_store_pd instead. I've also added the missing _mm_store_pd1 intrinsic (which maps to _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps). As a followup I'll update the llvm fast-isel tests to match this codegen. Repository: rL LLVM http://reviews.llvm.org/D20617 Files: lib/Headers/emmintrin.h lib/Headers/xmmintrin.h test/CodeGen/sse-builtins.c test/CodeGen/sse2-builtins.c Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -1205,6 +1205,13 @@ _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 _mm_store1_pd(x, y); } Index: test/CodeGen/sse-builtins.c === --- test/CodeGen/sse-builtins.c +++ test/CodeGen/sse-builtins.c @@ -651,7 +651,7 @@ void test_mm_store_ps1(float* x, __m128 y) { // CHECK-LABEL: test_mm_store_ps1 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}}) + // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 _mm_store_ps1(x, y); } @@ -665,7 +665,7 @@ void test_mm_store1_ps(float* x, __m128 y) { // CHECK-LABEL: test_mm_store1_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}}) + // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 _mm_store1_ps(x, y); } Index: lib/Headers/xmmintrin.h === --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -1590,22 +1590,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store1_ps(float *__p, __m128 __a) +_mm_store_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); - _mm_storeu_ps(__p, __a); + *(__m128 *)__p = __a; } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps1(float *__p, __m128 __a) +_mm_store1_ps(float *__p, __m128 __a) { -return _mm_store1_ps(__p, __a); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); + _mm_store_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps(float *__p, __m128 __a) +_mm_store_ps1(float *__p, __m128 __a) { - *(__m128 *)__p = __a; + return _mm_store1_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -582,19 +582,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d *)__dp = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { - struct __mm_store1_pd_struct { -double __u[2]; - } __attribute__((__packed__, __may_alias__)); - ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; - ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; + __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); + _mm_store_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { - *(__m128d *)__dp = __a; + return _mm_store1_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS ___ cfe-commits