Re: [PATCH] D20523: [Clang][AVX512][BUILTIN] Add missing intrinsics for cast .
This revision was automatically updated to reflect the committed changes. Closed by commit rL270699: [Clang][AVX512][BUILTIN] Add missing intrinsics for cast (authored by mzuckerm). Changed prior to commit: http://reviews.llvm.org/D20523?vs=58095=58411#toc Repository: rL LLVM http://reviews.llvm.org/D20523 Files: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Index: cfe/trunk/test/CodeGen/avx512f-builtins.c === --- cfe/trunk/test/CodeGen/avx512f-builtins.c +++ cfe/trunk/test/CodeGen/avx512f-builtins.c @@ -328,13 +328,6 @@ return _mm512_set1_pd(d); } -__m512d test_mm512_castpd256_pd512(__m256d a) -{ - // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} - return _mm512_castpd256_pd512(a); -} - __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: @test_mm512_knot @@ -5925,18 +5918,66 @@ return _mm512_maskz_cvttpd_epu32(__U, __A); } -__m512d test_mm512_castpd128_pd512(__m128d __A) { - // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> - return _mm512_castpd128_pd512(__A); +__m512 test_mm512_castpd_ps (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_ps + // CHECK: bitcast <8 x double> %1 to <16 x float> + return _mm512_castpd_ps (__A); +} + +__m512d test_mm512_castps_pd (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_pd + // CHECK: bitcast <16 x float> %1 to <8 x double> + return _mm512_castps_pd (__A); +} + +__m512i test_mm512_castpd_si512 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_si512 + // CHECK: bitcast <8 x double> %1 to <8 x i64> + return _mm512_castpd_si512 (__A); } __m512 test_mm512_castps128_ps512(__m128 __A) { // CHECK-LABEL: @test_mm512_castps128_ps512 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> return _mm512_castps128_ps512(__A); } +__m512d test_mm512_castpd128_pd512(__m128d __A) { + // CHECK-LABEL: @test_mm512_castpd128_pd512 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + return _mm512_castpd128_pd512(__A); +} + +__m512d test_mm512_castpd256_pd512(__m256d a) +{ + // CHECK-LABEL: @test_mm512_castpd256_pd512 + // CHECK: shufflevector <4 x double> {{.*}} + return _mm512_castpd256_pd512(a); +} + +__m256d test_mm512_castpd512_pd256 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd512_pd256 + // CHECK: shufflevector <8 x double> %1, <8 x double> %2, <4 x i32> + return _mm512_castpd512_pd256 (__A); +} + +__m256 test_mm512_castps512_ps256 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps512_ps256 + // CHECK: shufflevector <16 x float> %1, <16 x float> %2, <8 x i32> + return _mm512_castps512_ps256 (__A); +} + +__m512i test_mm512_castps_si512 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_si512 + // CHECK: bitcast <16 x float> %1 to <8 x i64> + return _mm512_castps_si512 (__A); +} __m512i test_mm512_castsi128_si512(__m128i __A) { // CHECK-LABEL: @test_mm512_castsi128_si512 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> @@ -5949,6 +5990,26 @@ return _mm512_castsi256_si512(__A); } +__m512 test_mm512_castsi512_ps (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_ps + // CHECK: bitcast <8 x i64> %1 to <16 x float> + return _mm512_castsi512_ps (__A); +} + +__m512d test_mm512_castsi512_pd (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_pd + // CHECK: bitcast <8 x i64> %1 to <8 x double> + return _mm512_castsi512_pd (__A); +} + +__m128i test_mm512_castsi512_si128 (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_si128 + // CHECK: shufflevector <8 x i64> %1, <8 x i64> %2, <2 x i32> + return _mm512_castsi512_si128 (__A); +} __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { // CHECK-LABEL: @test_mm_cvt_roundsd_ss Index: cfe/trunk/lib/Headers/avx512fintrin.h === --- cfe/trunk/lib/Headers/avx512fintrin.h +++ cfe/trunk/lib/Headers/avx512fintrin.h @@ -337,19 +337,54 @@ return __builtin_shufflevector(__a, __a, 0, 1); } +static __inline __m256d __DEFAULT_FN_ATTRS +_mm512_castpd512_pd256 (__m512d __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); +} + static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +static __inline __m256 __DEFAULT_FN_ATTRS +_mm512_castps512_ps256 (__m512 __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castpd_ps (__m512d __A) +{ + return (__m512) (__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_castpd_si512 (__m512d __A) +{ + return (__m512i) (__A); +} static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_castpd128_pd512 (__m128d __A) {
Re: [PATCH] D20523: [Clang][AVX512][BUILTIN] Add missing intrinsics for cast .
AsafBadouh accepted this revision. AsafBadouh added a comment. This revision is now accepted and ready to land. LGTM http://reviews.llvm.org/D20523 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20523: [Clang][AVX512][BUILTIN] Add missing intrinsics for cast .
m_zuckerman created this revision. m_zuckerman added reviewers: AsafBadouh, igorb, delena. m_zuckerman added a subscriber: cfe-commits. http://reviews.llvm.org/D20523 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -328,13 +328,6 @@ return _mm512_set1_pd(d); } -__m512d test_mm512_castpd256_pd512(__m256d a) -{ - // CHECK-LABEL: @test_mm512_castpd256_pd512 - // CHECK: shufflevector <4 x double> {{.*}} - return _mm512_castpd256_pd512(a); -} - __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: @test_mm512_knot @@ -5925,18 +5918,66 @@ return _mm512_maskz_cvttpd_epu32(__U, __A); } -__m512d test_mm512_castpd128_pd512(__m128d __A) { - // CHECK-LABEL: @test_mm512_castpd128_pd512 - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> - return _mm512_castpd128_pd512(__A); +__m512 test_mm512_castpd_ps (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_ps + // CHECK: bitcast <8 x double> %1 to <16 x float> + return _mm512_castpd_ps (__A); +} + +__m512d test_mm512_castps_pd (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_pd + // CHECK: bitcast <16 x float> %1 to <8 x double> + return _mm512_castps_pd (__A); +} + +__m512i test_mm512_castpd_si512 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd_si512 + // CHECK: bitcast <8 x double> %1 to <8 x i64> + return _mm512_castpd_si512 (__A); } __m512 test_mm512_castps128_ps512(__m128 __A) { // CHECK-LABEL: @test_mm512_castps128_ps512 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> return _mm512_castps128_ps512(__A); } +__m512d test_mm512_castpd128_pd512(__m128d __A) { + // CHECK-LABEL: @test_mm512_castpd128_pd512 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + return _mm512_castpd128_pd512(__A); +} + +__m512d test_mm512_castpd256_pd512(__m256d a) +{ + // CHECK-LABEL: @test_mm512_castpd256_pd512 + // CHECK: shufflevector <4 x double> {{.*}} + return _mm512_castpd256_pd512(a); +} + +__m256d test_mm512_castpd512_pd256 (__m512d __A) +{ + // CHECK-LABEL: @test_mm512_castpd512_pd256 + // CHECK: shufflevector <8 x double> %1, <8 x double> %2, <4 x i32> + return _mm512_castpd512_pd256 (__A); +} + +__m256 test_mm512_castps512_ps256 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps512_ps256 + // CHECK: shufflevector <16 x float> %1, <16 x float> %2, <8 x i32> + return _mm512_castps512_ps256 (__A); +} + +__m512i test_mm512_castps_si512 (__m512 __A) +{ + // CHECK-LABEL: @test_mm512_castps_si512 + // CHECK: bitcast <16 x float> %1 to <8 x i64> + return _mm512_castps_si512 (__A); +} __m512i test_mm512_castsi128_si512(__m128i __A) { // CHECK-LABEL: @test_mm512_castsi128_si512 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> @@ -5949,6 +5990,26 @@ return _mm512_castsi256_si512(__A); } +__m512 test_mm512_castsi512_ps (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_ps + // CHECK: bitcast <8 x i64> %1 to <16 x float> + return _mm512_castsi512_ps (__A); +} + +__m512d test_mm512_castsi512_pd (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_pd + // CHECK: bitcast <8 x i64> %1 to <8 x double> + return _mm512_castsi512_pd (__A); +} + +__m128i test_mm512_castsi512_si128 (__m512i __A) +{ + // CHECK-LABEL: @test_mm512_castsi512_si128 + // CHECK: shufflevector <8 x i64> %1, <8 x i64> %2, <2 x i32> + return _mm512_castsi512_si128 (__A); +} __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { // CHECK-LABEL: @test_mm_cvt_roundsd_ss Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -337,19 +337,54 @@ return __builtin_shufflevector(__a, __a, 0, 1); } +static __inline __m256d __DEFAULT_FN_ATTRS +_mm512_castpd512_pd256 (__m512d __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); +} + static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +static __inline __m256 __DEFAULT_FN_ATTRS +_mm512_castps512_ps256 (__m512 __A) +{ + return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_castpd_ps (__m512d __A) +{ + return (__m512) (__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_castpd_si512 (__m512d __A) +{ + return (__m512i) (__A); +} static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_castpd128_pd512 (__m128d __A) { return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_castps_pd (__m512 __A) +{ + return (__m512d) (__A); +} + +static __inline __m512i