Author: ctopper Date: Wed May 30 11:27:07 2018 New Revision: 333572 URL: http://llvm.org/viewvc/llvm-project?rev=333572&view=rev Log: [X86] Simplify the implementation of _mm_sqrt_ss, _mm_rcp_ss, and _mm_rsqrt_ss.
We don't need the insertion back into the original vector at the end. The builtin already understands that. This is different than _mm_sqrt_sd which takes two arguments and we do need to insert. Modified: cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/sse-builtins.c Modified: cfe/trunk/lib/Headers/xmmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=333572&r1=333571&r2=333572&view=diff ============================================================================== --- cfe/trunk/lib/Headers/xmmintrin.h (original) +++ cfe/trunk/lib/Headers/xmmintrin.h Wed May 30 11:27:07 2018 @@ -224,8 +224,7 @@ _mm_div_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_sqrtss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return (__m128)__builtin_ia32_sqrtss((__v4sf)__a); } /// Calculates the square roots of the values stored in a 128-bit vector @@ -260,8 +259,7 @@ _mm_sqrt_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rcpss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return (__m128)__builtin_ia32_rcpss((__v4sf)__a); } /// Calculates the approximate reciprocals of the values stored in a @@ -278,7 +276,7 @@ _mm_rcp_ss(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a) { - return __builtin_ia32_rcpps((__v4sf)__a); + return (__m128)__builtin_ia32_rcpps((__v4sf)__a); } /// Calculates the approximate reciprocal of the square root of the value @@ -297,8 +295,7 @@ _mm_rcp_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return __builtin_ia32_rsqrtss((__v4sf)__a); } /// Calculates the approximate reciprocals of the square roots of the Modified: cfe/trunk/test/CodeGen/sse-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=333572&r1=333571&r2=333572&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/sse-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse-builtins.c Wed May 30 11:27:07 2018 @@ -508,14 +508,6 @@ __m128 test_mm_rcp_ps(__m128 x) { __m128 test_mm_rcp_ss(__m128 x) { // CHECK-LABEL: test_mm_rcp_ss // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}}) - // CHECK: extractelement <4 x float> {{.*}}, i32 0 - // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 - // CHECK: extractelement <4 x float> {{.*}}, i32 1 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 - // CHECK: extractelement <4 x float> {{.*}}, i32 2 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 - // CHECK: extractelement <4 x float> {{.*}}, i32 3 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_rcp_ss(x); } @@ -528,14 +520,6 @@ __m128 test_mm_rsqrt_ps(__m128 x) { __m128 test_mm_rsqrt_ss(__m128 x) { // CHECK-LABEL: test_mm_rsqrt_ss // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}}) - // CHECK: extractelement <4 x float> {{.*}}, i32 0 - // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 - // CHECK: extractelement <4 x float> {{.*}}, i32 1 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 - // CHECK: extractelement <4 x float> {{.*}}, i32 2 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 - // CHECK: extractelement <4 x float> {{.*}}, i32 3 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_rsqrt_ss(x); } @@ -662,14 +646,6 @@ __m128 test_mm_sqrt_ps(__m128 x) { __m128 test_sqrt_ss(__m128 x) { // CHECK: define {{.*}} @test_sqrt_ss // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss - // CHECK: extractelement <4 x float> {{.*}}, i32 0 - // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 - // CHECK: extractelement <4 x float> {{.*}}, i32 1 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 - // CHECK: extractelement <4 x float> {{.*}}, i32 2 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 - // CHECK: extractelement <4 x float> {{.*}}, i32 3 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_sqrt_ss(x); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits