r287114 - Remove duplicate condition (PR30648). NFCI.
Author: rksimon Date: Wed Nov 16 10:11:08 2016 New Revision: 287114 URL: http://llvm.org/viewvc/llvm-project?rev=287114=rev Log: Remove duplicate condition (PR30648). NFCI. We only need to check that the bitstream entry is a Record. Modified: cfe/trunk/lib/Serialization/ASTReader.cpp Modified: cfe/trunk/lib/Serialization/ASTReader.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=287114=287113=287114=diff == --- cfe/trunk/lib/Serialization/ASTReader.cpp (original) +++ cfe/trunk/lib/Serialization/ASTReader.cpp Wed Nov 16 10:11:08 2016 @@ -4189,8 +4189,7 @@ static ASTFileSignature readASTFileSigna ASTReader::RecordData Record; while (true) { llvm::BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); -if (Entry.Kind == llvm::BitstreamEntry::EndBlock || -Entry.Kind != llvm::BitstreamEntry::Record) +if (Entry.Kind != llvm::BitstreamEntry::Record) return 0; Record.clear(); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26686: [X86][AVX512] Replace lossless i32/u32 to f64 conversion intrinsics with generic IR
This revision was automatically updated to reflect the committed changes. Closed by commit rL287088: [X86][AVX512] Replace lossless i32/u32 to f64 conversion intrinsics with… (authored by RKSimon). Changed prior to commit: https://reviews.llvm.org/D26686?vs=78038=78146#toc Repository: rL LLVM https://reviews.llvm.org/D26686 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def === --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -961,8 +961,6 @@ TARGET_BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "", "avx512f") @@ -1165,8 +1163,6 @@ TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl") @@ -1189,8 +1185,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtudq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtudq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtudq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl") Index: cfe/trunk/test/CodeGen/avx512f-builtins.c === --- cfe/trunk/test/CodeGen/avx512f-builtins.c +++ cfe/trunk/test/CodeGen/avx512f-builtins.c @@ -6949,33 +6949,43 @@ return _mm512_maskz_cvtepu32_ps (__U,__A); } +__m512d test_mm512_cvtepi32_pd (__m256i __A) +{ + // CHECK-LABEL: @test_mm512_cvtepi32_pd + // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> + return _mm512_cvtepi32_pd (__A); +} + __m512d test_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_pd - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512 + // CHECK-LABEL: @test_mm512_mask_cvtepi32_pd + // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> + // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} return _mm512_mask_cvtepi32_pd (__W,__U,__A); } __m512d test_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_pd - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512 + // CHECK-LABEL: @test_mm512_maskz_cvtepi32_pd + // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> + // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} return _mm512_maskz_cvtepi32_pd (__U,__A); } __m512d test_mm512_cvtepi32lo_pd (__m512i __A) { // CHECK-LABEL: @test_mm512_cvtepi32lo_pd // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512 + // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> return _mm512_cvtepi32lo_pd (__A); } __m512d test_mm512_mask_cvtepi32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_cvtepi32lo_pd // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512 + // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> + // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} return _mm512_mask_cvtepi32lo_pd (__W, __U, __A); } @@ -7000,33 +7010,43 @@ return _mm512_maskz_cvtepi32_ps (__U,__A); } +__m512d test_mm512_cvtepu32_pd(__m256i __A) +{ + //
r287088 - [X86][AVX512] Replace lossless i32/u32 to f64 conversion intrinsics with generic IR
Author: rksimon Date: Wed Nov 16 03:27:40 2016 New Revision: 287088 URL: http://llvm.org/viewvc/llvm-project?rev=287088=rev Log: [X86][AVX512] Replace lossless i32/u32 to f64 conversion intrinsics with generic IR Both the (V)CVTDQ2PD (i32 to f64) and (V)CVTUDQ2PD (u32 to f64) conversion instructions are lossless and can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics without affecting final codegen. This patch removes the clang builtins and their use in the headers - a future patch will deal with removing the llvm intrinsics. This is an extension patch to D20528 which dealt with the equivalent sse/avx cases. Differential Revision: https://reviews.llvm.org/D26686 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=287088=287087=287088=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Nov 16 03:27:40 2016 @@ -961,8 +961,6 @@ TARGET_BUILTIN(__builtin_ia32_maxps512_m TARGET_BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "", "avx512f") @@ -1165,8 +1163,6 @@ TARGET_BUILTIN(__builtin_ia32_compressst TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl") @@ -1189,8 +1185,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttps2dq1 TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtudq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtudq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtudq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl") Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=287088=287087=287088=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Nov 16 03:27:40 2016 @@ -3740,26 +3740,23 @@ _mm512_maskz_cvtepu32_ps (__mmask16 __U, static __inline __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi32_pd(__m256i __A) { - return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, -(__v8df) -_mm512_setzero_pd (), -(__mmask8) -1); + return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { - return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, -(__v8df) __W, -(__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, + (__v8df)_mm512_cvtepi32_pd(__A), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { - return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, -(__v8df) _mm512_setzero_pd (), -(__mmask8) __U); + return
[PATCH] D26686: [X86][AVX512] Replace lossless i32/u64 to f64 conversion intrinsics with generic IR
RKSimon created this revision. RKSimon added reviewers: craig.topper, igorb, delena. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. Both the (V)CVTDQ2PD (i32 to f64) and (V)CVTDQ2PD (u32 to f64) conversion instructions are lossless and can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics without affecting final codegen. This patch removes the clang builtins and their use in the headers - a future patch will deal with removing the llvm intrinsics. This is an extension patch to https://reviews.llvm.org/D20528 which dealt with the equivalent sse/avx cases. Repository: rL LLVM https://reviews.llvm.org/D26686 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512fintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512f-builtins.c test/CodeGen/avx512vl-builtins.c Index: test/CodeGen/avx512vl-builtins.c === --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -1737,23 +1737,29 @@ } __m128d test_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_cvtepi32_pd - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128 - return _mm_mask_cvtepi32_pd(__W,__U,__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + return _mm_mask_cvtepi32_pd(__W,__U,__A); } __m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_cvtepi32_pd - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128 - return _mm_maskz_cvtepi32_pd(__U,__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + return _mm_maskz_cvtepi32_pd(__U,__A); } __m256d test_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_cvtepi32_pd - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256 - return _mm256_mask_cvtepi32_pd(__W,__U,__A); + // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> + // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + return _mm256_mask_cvtepi32_pd(__W,__U,__A); } __m256d test_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm256_maskz_cvtepi32_pd - // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256 - return _mm256_maskz_cvtepi32_pd(__U,__A); + // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> + // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + return _mm256_maskz_cvtepi32_pd(__U,__A); } __m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_cvtepi32_ps @@ -2017,33 +2023,40 @@ } __m128d test_mm_cvtepu32_pd(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepu32_pd - // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128 - return _mm_cvtepu32_pd(__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> + return _mm_cvtepu32_pd(__A); } __m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_cvtepu32_pd - // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128 - return _mm_mask_cvtepu32_pd(__W,__U,__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + return _mm_mask_cvtepu32_pd(__W,__U,__A); } __m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_cvtepu32_pd - // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128 - return _mm_maskz_cvtepu32_pd(__U,__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> + // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} + return _mm_maskz_cvtepu32_pd(__U,__A); } __m256d test_mm256_cvtepu32_pd(__m128i __A) { // CHECK-LABEL: @test_mm256_cvtepu32_pd - // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256 - return _mm256_cvtepu32_pd(__A); + // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> + return _mm256_cvtepu32_pd(__A); } __m256d test_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_cvtepu32_pd - // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256 - return _mm256_mask_cvtepu32_pd(__W,__U,__A); + // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> + // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} + return _mm256_mask_cvtepu32_pd(__W,__U,__A); } __m256d test_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL:
r286996 - Fixed spelling in comments. NFCI.
Author: rksimon Date: Tue Nov 15 12:28:07 2016 New Revision: 286996 URL: http://llvm.org/viewvc/llvm-project?rev=286996=rev Log: Fixed spelling in comments. NFCI. Modified: cfe/trunk/include/clang/AST/DeclObjC.h cfe/trunk/include/clang/Basic/TargetInfo.h cfe/trunk/lib/Sema/SemaExprCXX.cpp Modified: cfe/trunk/include/clang/AST/DeclObjC.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/DeclObjC.h?rev=286996=286995=286996=diff == --- cfe/trunk/include/clang/AST/DeclObjC.h (original) +++ cfe/trunk/include/clang/AST/DeclObjC.h Tue Nov 15 12:28:07 2016 @@ -394,7 +394,7 @@ public: /// createImplicitParams - Used to lazily create the self and cmd /// implict parameters. This must be called prior to using getSelfDecl() - /// or getCmdDecl(). The call is ignored if the implicit paramters + /// or getCmdDecl(). The call is ignored if the implicit parameters /// have already been created. void createImplicitParams(ASTContext , const ObjCInterfaceDecl *ID); Modified: cfe/trunk/include/clang/Basic/TargetInfo.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TargetInfo.h?rev=286996=286995=286996=diff == --- cfe/trunk/include/clang/Basic/TargetInfo.h (original) +++ cfe/trunk/include/clang/Basic/TargetInfo.h Tue Nov 15 12:28:07 2016 @@ -989,7 +989,7 @@ public: return false; } - /// \brief Whether target allows to overalign ABI-specified prefered alignment + /// \brief Whether target allows to overalign ABI-specified preferred alignment virtual bool allowsLargerPreferedTypeAlignment() const { return true; } /// \brief Set supported OpenCL extensions and optional core features. Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=286996=286995=286996=diff == --- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original) +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Tue Nov 15 12:28:07 2016 @@ -1150,7 +1150,7 @@ bool Sema::CheckCXXThisCapture(SourceLoc // In the loop below, respect the ByCopy flag only for the closure requesting // the capture (i.e. first iteration through the loop below). Ignore it for - // all enclosing closure's upto NumCapturingClosures (since they must be + // all enclosing closure's up to NumCapturingClosures (since they must be // implicitly capturing the *enclosing object* by reference (see loop // above)). assert((!ByCopy || ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20359: [LLVM][AVX512][Intrinsics] Convert AVX non-temporal store builtins to LLVM-native IR.
RKSimon added a comment. Close this? It appears to have been committed, including the auto upgrade tests requested by Craig. https://reviews.llvm.org/D20359 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r286595 - [X86] Merge (near) duplicate scalar non-temporal store code. NFCI.
Author: rksimon Date: Fri Nov 11 08:38:34 2016 New Revision: 286595 URL: http://llvm.org/viewvc/llvm-project?rev=286595=rev Log: [X86] Merge (near) duplicate scalar non-temporal store code. NFCI. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=286595=286594=286595=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Nov 11 08:38:34 2016 @@ -7514,36 +7514,26 @@ Value *CodeGenFunction::EmitX86BuiltinEx } case X86::BI__builtin_ia32_movnti: - case X86::BI__builtin_ia32_movnti64: { -llvm::MDNode *Node = llvm::MDNode::get( -getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); - -// Convert the type of the pointer to a pointer to the stored type. -Value *BC = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType()), - "cast"); -StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); -SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - -// No alignment for scalar intrinsic store. -SI->setAlignment(1); -return SI; - } + case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: case X86::BI__builtin_ia32_movntss: { llvm::MDNode *Node = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); +Value *Ptr = Ops[0]; +Value *Src = Ops[1]; + // Extract the 0'th element of the source vector. -Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); +if (BuiltinID == X86::BI__builtin_ia32_movntsd || +BuiltinID == X86::BI__builtin_ia32_movntss) + Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); // Convert the type of the pointer to a pointer to the stored type. -Value *BC = Builder.CreateBitCast(Ops[0], -llvm::PointerType::getUnqual(Scl->getType()), - "cast"); +Value *BC = Builder.CreateBitCast( +Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); // Unaligned nontemporal store of the scalar value. -StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); +StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); SI->setAlignment(1); return SI; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] r286449 - Fix -Wdocumentation warning
Author: rksimon Date: Thu Nov 10 07:54:39 2016 New Revision: 286449 URL: http://llvm.org/viewvc/llvm-project?rev=286449=rev Log: Fix -Wdocumentation warning Modified: clang-tools-extra/trunk/clang-move/ClangMove.h Modified: clang-tools-extra/trunk/clang-move/ClangMove.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-move/ClangMove.h?rev=286449=286448=286449=diff == --- clang-tools-extra/trunk/clang-move/ClangMove.h (original) +++ clang-tools-extra/trunk/clang-move/ClangMove.h Thu Nov 10 07:54:39 2016 @@ -72,7 +72,7 @@ public: /// \param SearchPath The search path which was used to find the IncludeHeader /// in the file system. It can be a relative path or an absolute path. /// \param FileName The name of file where the IncludeHeader comes from. - /// \param IncludeRange The source range for the written file name in #include + /// \param IncludeFilenameRange The source range for the written file name in #include /// (i.e. "old.h" for #include "old.h") in old.cc. /// \param SM The SourceManager. void addIncludes(llvm::StringRef IncludeHeader, bool IsAngled, ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285390 - Fix MSVC "not all control paths return a value" warning
Author: rksimon Date: Fri Oct 28 05:09:35 2016 New Revision: 285390 URL: http://llvm.org/viewvc/llvm-project?rev=285390=rev Log: Fix MSVC "not all control paths return a value" warning Add unreachable after enum switch statement Modified: cfe/trunk/lib/Driver/Action.cpp Modified: cfe/trunk/lib/Driver/Action.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=285390=285389=285390=diff == --- cfe/trunk/lib/Driver/Action.cpp (original) +++ cfe/trunk/lib/Driver/Action.cpp Fri Oct 28 05:09:35 2016 @@ -146,6 +146,8 @@ llvm::StringRef Action::GetOffloadKindNa // TODO: Add other programming models here. } + + llvm_unreachable("invalid offload kind"); } void InputAction::anchor() {} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: r285281 - Fix MSVC warning about missing 'this' from lambda default capture mode
> On 27 Oct 2016, at 13:25, Erik Verbruggen <erik.verbrug...@me.com> wrote: > > Eh? Preprocessor::IsFileLexer(const IncludeStackInfo &) is static and doesn't > need 'this'... Apparently MSVC mistakingly confuses that method with the > non-static Preprocessor::IsFileLexer() method? You’re not kidding, its incredibly annoying. I can add a comment explaining why its there if you wish? Simon >> On 27 Oct 2016, at 12:51, Simon Pilgrim via cfe-commits >> <cfe-commits@lists.llvm.org> wrote: >> >> Author: rksimon >> Date: Thu Oct 27 05:51:29 2016 >> New Revision: 285281 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=285281=rev >> Log: >> Fix MSVC warning about missing 'this' from lambda default capture mode >> >> Modified: >> cfe/trunk/lib/Lex/PPLexerChange.cpp >> >> Modified: cfe/trunk/lib/Lex/PPLexerChange.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPLexerChange.cpp?rev=285281=285280=285281=diff >> == >> --- cfe/trunk/lib/Lex/PPLexerChange.cpp (original) >> +++ cfe/trunk/lib/Lex/PPLexerChange.cpp Thu Oct 27 05:51:29 2016 >> @@ -41,7 +41,7 @@ bool Preprocessor::isInPrimaryFile() con >> assert(IsFileLexer(IncludeMacroStack[0]) && >> "Top level include stack isn't our primary lexer?"); >> return std::none_of(IncludeMacroStack.begin() + 1, IncludeMacroStack.end(), >> - [](const IncludeStackInfo ) -> bool { >> + [this](const IncludeStackInfo ) -> bool { >>return IsFileLexer(ISI); >> }); >> } >> >> >> ___ >> cfe-commits mailing list >> cfe-commits@lists.llvm.org >> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits > ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285281 - Fix MSVC warning about missing 'this' from lambda default capture mode
Author: rksimon Date: Thu Oct 27 05:51:29 2016 New Revision: 285281 URL: http://llvm.org/viewvc/llvm-project?rev=285281=rev Log: Fix MSVC warning about missing 'this' from lambda default capture mode Modified: cfe/trunk/lib/Lex/PPLexerChange.cpp Modified: cfe/trunk/lib/Lex/PPLexerChange.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPLexerChange.cpp?rev=285281=285280=285281=diff == --- cfe/trunk/lib/Lex/PPLexerChange.cpp (original) +++ cfe/trunk/lib/Lex/PPLexerChange.cpp Thu Oct 27 05:51:29 2016 @@ -41,7 +41,7 @@ bool Preprocessor::isInPrimaryFile() con assert(IsFileLexer(IncludeMacroStack[0]) && "Top level include stack isn't our primary lexer?"); return std::none_of(IncludeMacroStack.begin() + 1, IncludeMacroStack.end(), - [](const IncludeStackInfo ) -> bool { + [this](const IncludeStackInfo ) -> bool { return IsFileLexer(ISI); }); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285067 - Fix MSVC unused variable warning.
Author: rksimon Date: Tue Oct 25 07:59:15 2016 New Revision: 285067 URL: http://llvm.org/viewvc/llvm-project?rev=285067=rev Log: Fix MSVC unused variable warning. LLVM_ATTRIBUTE_UNUSED doesn't work for non-gcc style compilers. Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp?rev=285067=285066=285067=diff == --- cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (original) +++ cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp Tue Oct 25 07:59:15 2016 @@ -464,10 +464,11 @@ void StdLibraryFunctionsChecker::initFun QualType SSizeTy = ACtx.getIntTypeForBitwidth(ACtx.getTypeSize(SizeTy), true); // Don't worry about truncation here, it'd be cast back to SIZE_MAX when used. - LLVM_ATTRIBUTE_UNUSED int64_t SizeMax = + int64_t SizeMax = BVF.getMaxValue(SizeTy).getLimitedValue(); int64_t SSizeMax = BVF.getMaxValue(SSizeTy).getLimitedValue(); + (void)SizeMax; // We are finally ready to define specifications for all supported functions. // ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] r284476 - Fix signed/unsigned comparison warnings
Author: rksimon Date: Tue Oct 18 08:15:31 2016 New Revision: 284476 URL: http://llvm.org/viewvc/llvm-project?rev=284476=rev Log: Fix signed/unsigned comparison warnings Modified: clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp Modified: clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp?rev=284476=284475=284476=diff == --- clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp (original) +++ clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp Tue Oct 18 08:15:31 2016 @@ -51,7 +51,7 @@ private: }; template -std::string runChecker(StringRef Code, int ExpectedWarningCount) { +std::string runChecker(StringRef Code, unsigned ExpectedWarningCount) { std::mapAdditionalFileContents = {{"foo.h", "namespace foo {\n" "namespace bar {\n" Modified: clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp?rev=284476=284475=284476=diff == --- clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp (original) +++ clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp Tue Oct 18 08:15:31 2016 @@ -53,7 +53,7 @@ private: }; template -std::string runChecker(StringRef Code, int ExpectedWarningCount) { +std::string runChecker(StringRef Code, unsigned ExpectedWarningCount) { std::map AdditionalFileContents = {{"foo.h", "namespace foo {\n" "namespace bar {\n" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r283549 - Wdocumentation fix
Author: rksimon Date: Fri Oct 7 08:25:41 2016 New Revision: 283549 URL: http://llvm.org/viewvc/llvm-project?rev=283549=rev Log: Wdocumentation fix Modified: cfe/trunk/include/clang/Sema/Sema.h Modified: cfe/trunk/include/clang/Sema/Sema.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=283549=283548=283549=diff == --- cfe/trunk/include/clang/Sema/Sema.h (original) +++ cfe/trunk/include/clang/Sema/Sema.h Fri Oct 7 08:25:41 2016 @@ -8837,16 +8837,16 @@ public: /// Check assignment constraints for an assignment of RHS to LHSType. /// - /// \brief LHSType The destination type for the assignment. - /// \brief RHS The source expression for the assignment. - /// \brief Diagnose If \c true, diagnostics may be produced when checking + /// \param LHSType The destination type for the assignment. + /// \param RHS The source expression for the assignment. + /// \param Diagnose If \c true, diagnostics may be produced when checking ///for assignability. If a diagnostic is produced, \p RHS will be ///set to ExprError(). Note that this function may still return ///without producing a diagnostic, even for an invalid assignment. - /// \brief DiagnoseCFAudited If \c true, the target is a function parameter + /// \param DiagnoseCFAudited If \c true, the target is a function parameter ///in an audited Core Foundation API and does not need to be checked ///for ARC retain issues. - /// \brief ConvertRHS If \c true, \p RHS will be updated to model the + /// \param ConvertRHS If \c true, \p RHS will be updated to model the ///conversions necessary to perform the assignment. If \c false, ///\p Diagnose must also be \c false. AssignConvertType CheckSingleAssignmentConstraints( ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r283106 - Wdocumentation fix
Author: rksimon Date: Mon Oct 3 07:37:08 2016 New Revision: 283106 URL: http://llvm.org/viewvc/llvm-project?rev=283106=rev Log: Wdocumentation fix Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=283106=283105=283106=diff == --- cfe/trunk/lib/Analysis/CloneDetection.cpp (original) +++ cfe/trunk/lib/Analysis/CloneDetection.cpp Mon Oct 3 07:37:08 2016 @@ -110,7 +110,7 @@ class VariablePattern { /// \brief Adds a new variable referenced to this pattern. /// \param VarDecl The declaration of the variable that is referenced. - /// \param Range The SourceRange where this variable is referenced. + /// \param Mention The statement in the code where the variable was referenced. void addVariableOccurence(const VarDecl *VarDecl, const Stmt *Mention) { // First check if we already reference this variable for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r282858 - Strip trailing whitespace (NFCI)
Author: rksimon Date: Fri Sep 30 09:25:09 2016 New Revision: 282858 URL: http://llvm.org/viewvc/llvm-project?rev=282858=rev Log: Strip trailing whitespace (NFCI) Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=282858=282857=282858=diff == --- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original) +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Sep 30 09:25:09 2016 @@ -292,7 +292,7 @@ ParsedType Sema::getDestructorName(Sourc if (isDependent) { // We didn't find our type, but that's okay: it's dependent // anyway. - + // FIXME: What if we have no nested-name-specifier? QualType T = CheckTypenameType(ETK_None, SourceLocation(), SS.getWithLocInContext(Context), @@ -326,14 +326,14 @@ ParsedType Sema::getDestructorName(Sourc ParsedType Sema::getDestructorType(const DeclSpec& DS, ParsedType ObjectType) { if (DS.getTypeSpecType() == DeclSpec::TST_error || !ObjectType) return nullptr; -assert(DS.getTypeSpecType() == DeclSpec::TST_decltype +assert(DS.getTypeSpecType() == DeclSpec::TST_decltype && "only get destructor types from declspecs"); QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc()); QualType SearchType = GetTypeFromParser(ObjectType); if (SearchType->isDependentType() || Context.hasSameUnqualifiedType(SearchType, T)) { return ParsedType::make(T); } - + Diag(DS.getTypeSpecTypeLoc(), diag::err_destructor_expr_type_mismatch) << T << SearchType; return nullptr; @@ -662,7 +662,7 @@ Sema::ActOnCXXThrow(Scope *S, SourceLoca IsThrownVarInScope = true; break; } - + if (S->getFlags() & (Scope::FnScope | Scope::ClassScope | Scope::BlockScope | Scope::FunctionPrototypeScope | Scope::ObjCMethodScope | @@ -672,11 +672,11 @@ Sema::ActOnCXXThrow(Scope *S, SourceLoca } } } - + return BuildCXXThrow(OpLoc, Ex, IsThrownVarInScope); } -ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex, +ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex, bool IsThrownVarInScope) { // Don't report an error if 'throw' is used in system headers. if (!getLangOpts().CXXExceptions && @@ -907,10 +907,10 @@ static QualType adjustCVQualifiersForCXX I-- && isa(FunctionScopes[I]); CurDC = getLambdaAwareParentOfDeclContext(CurDC)) { CurLSI = cast(FunctionScopes[I]); - -if (!CurLSI->isCXXThisCaptured()) + +if (!CurLSI->isCXXThisCaptured()) continue; - + auto C = CurLSI->getCXXThisCapture(); if (C.isCopyCapture()) { @@ -926,7 +926,7 @@ static QualType adjustCVQualifiersForCXX assert(CurLSI); assert(isGenericLambdaCallOperatorSpecialization(CurLSI->CallOperator)); assert(CurDC == getLambdaAwareParentOfDeclContext(CurLSI->CallOperator)); - + auto IsThisCaptured = [](CXXRecordDecl *Closure, bool , bool ) { IsConst = false; @@ -996,10 +996,10 @@ QualType Sema::getCurrentThisType() { return ThisTy; } -Sema::CXXThisScopeRAII::CXXThisScopeRAII(Sema , +Sema::CXXThisScopeRAII::CXXThisScopeRAII(Sema , Decl *ContextDecl, unsigned CXXThisTypeQuals, - bool Enabled) + bool Enabled) : S(S), OldCXXThisTypeOverride(S.CXXThisTypeOverride), Enabled(false) { if (!Enabled || !ContextDecl) @@ -1010,13 +1010,13 @@ Sema::CXXThisScopeRAII::CXXThisScopeRAII Record = Template->getTemplatedDecl(); else Record = cast(ContextDecl); - + // We care only for CVR qualifiers here, so cut everything else. CXXThisTypeQuals &= Qualifiers::FastMask; S.CXXThisTypeOverride = S.Context.getPointerType( S.Context.getRecordType(Record).withCVRQualifiers(CXXThisTypeQuals)); - + this->Enabled = true; } @@ -1030,7 +1030,7 @@ Sema::CXXThisScopeRAII::~CXXThisScopeRAI static Expr *captureThis(Sema , ASTContext , RecordDecl *RD, QualType ThisTy, SourceLocation Loc, const bool ByCopy) { - + QualType AdjustedThisTy = ThisTy; // The type of the corresponding data member (not a 'this' pointer if 'by // copy'). @@ -1043,7 +1043,7 @@ static Expr *captureThis(Sema , ASTCon CaptureThisFieldTy.removeLocalCVRQualifiers(Qualifiers::CVRMask); AdjustedThisTy = Context.getPointerType(CaptureThisFieldTy); } - + FieldDecl *Field = FieldDecl::Create( Context, RD, Loc, Loc, nullptr, CaptureThisFieldTy, Context.getTrivialTypeSourceInfo(CaptureThisFieldTy, Loc),
r282857 - Fix int <= bool comparison warning on MSVC
Author: rksimon Date: Fri Sep 30 09:18:06 2016 New Revision: 282857 URL: http://llvm.org/viewvc/llvm-project?rev=282857=rev Log: Fix int <= bool comparison warning on MSVC Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=282857=282856=282857=diff == --- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original) +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Sep 30 09:18:06 2016 @@ -2366,11 +2366,14 @@ void Sema::DeclareGlobalNewDelete() { bool HasSizedVariant = getLangOpts().SizedDeallocation && (Kind == OO_Delete || Kind == OO_Array_Delete); bool HasAlignedVariant = getLangOpts().CPlusPlus1z; -for (int Sized = 0; Sized <= HasSizedVariant; ++Sized) { + +int NumSizeVariants = (HasSizedVariant ? 2 : 1); +int NumAlignVariants = (HasAlignedVariant ? 2 : 1); +for (int Sized = 0; Sized < NumSizeVariants; ++Sized) { if (Sized) Params.push_back(SizeT); - for (int Aligned = 0; Aligned <= HasAlignedVariant; ++Aligned) { + for (int Aligned = 0; Aligned < NumAlignVariants; ++Aligned) { if (Aligned) Params.push_back(Context.getTypeDeclType(getStdAlignValT())); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21021: [Clang][AVX512][BuiltIn]Adding intrinsics move_{sd|ss} to clang
RKSimon added a subscriber: RKSimon. Comment at: lib/Headers/avx512fintrin.h:9124 @@ +9123,3 @@ +{ + return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B, + (__v4sf) __W, delena wrote: > please try the following: > if (__U) > return __builtin_shuffle(A, B, (0, 5, 6, 7)); // may be you need to swap A > and B > return W; > > I know that the immediate code will be less optimal, but we can optimize it > later. Any update on this? I currently have a patch (D24653) looking at removing the movss/movsd mask intrinsics as we should be able to do this with purely generic shuffles. I can help with the optimization if necessary. https://reviews.llvm.org/D21021 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r280921 - Moved unreachable to appease msvc, gcc and clang
Author: rksimon Date: Thu Sep 8 06:03:41 2016 New Revision: 280921 URL: http://llvm.org/viewvc/llvm-project?rev=280921=rev Log: Moved unreachable to appease msvc, gcc and clang Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTables.cpp?rev=280921=280920=280921=diff == --- cfe/trunk/lib/CodeGen/CGVTables.cpp (original) +++ cfe/trunk/lib/CodeGen/CGVTables.cpp Thu Sep 8 06:03:41 2016 @@ -529,9 +529,6 @@ llvm::Constant *CodeGenVTables::CreateVT }; switch (Component.getKind()) { - default: -llvm_unreachable("Unexpected vtable component kind"); - case VTableComponent::CK_VCallOffset: return OffsetConstant(Component.getVCallOffset()); @@ -619,6 +616,8 @@ llvm::Constant *CodeGenVTables::CreateVT case VTableComponent::CK_UnusedFunctionPointer: return llvm::ConstantExpr::getNullValue(CGM.Int8PtrTy); } + + llvm_unreachable("Unexpected vtable component kind"); } llvm::Constant * ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r280917 - Fixed a 'not all control paths return a value' warning on MSVC builds
Author: rksimon Date: Thu Sep 8 04:59:58 2016 New Revision: 280917 URL: http://llvm.org/viewvc/llvm-project?rev=280917=rev Log: Fixed a 'not all control paths return a value' warning on MSVC builds Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTables.cpp?rev=280917=280916=280917=diff == --- cfe/trunk/lib/CodeGen/CGVTables.cpp (original) +++ cfe/trunk/lib/CodeGen/CGVTables.cpp Thu Sep 8 04:59:58 2016 @@ -29,7 +29,7 @@ using namespace CodeGen; CodeGenVTables::CodeGenVTables(CodeGenModule ) : CGM(CGM), VTContext(CGM.getContext().getVTableContext()) {} -llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD, +llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD, const ThunkInfo ) { const CXXMethodDecl *MD = cast(GD.getDecl()); @@ -93,7 +93,7 @@ static RValue PerformReturnAdjustment(Co AdjustNull = CGF.createBasicBlock("adjust.null"); AdjustNotNull = CGF.createBasicBlock("adjust.notnull"); AdjustEnd = CGF.createBasicBlock("adjust.end"); - + llvm::Value *IsNull = CGF.Builder.CreateIsNull(ReturnValue); CGF.Builder.CreateCondBr(IsNull, AdjustNull, AdjustNotNull); CGF.EmitBlock(AdjustNotNull); @@ -110,14 +110,14 @@ static RValue PerformReturnAdjustment(Co CGF.EmitBlock(AdjustNull); CGF.Builder.CreateBr(AdjustEnd); CGF.EmitBlock(AdjustEnd); - + llvm::PHINode *PHI = CGF.Builder.CreatePHI(ReturnValue->getType(), 2); PHI->addIncoming(ReturnValue, AdjustNotNull); -PHI->addIncoming(llvm::Constant::getNullValue(ReturnValue->getType()), +PHI->addIncoming(llvm::Constant::getNullValue(ReturnValue->getType()), AdjustNull); ReturnValue = PHI; } - + return RValue::get(ReturnValue); } @@ -314,7 +314,7 @@ void CodeGenFunction::EmitCallAndReturnF CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && !hasScalarEvaluationKind(CurFnInfo->getReturnType())) Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); - + // Now emit our call. llvm::Instruction *CallOrInvoke; RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD, ); @@ -433,14 +433,14 @@ void CodeGenVTables::emitThunk(GlobalDec // Remove the name from the old thunk function and get a new thunk. OldThunkFn->setName(StringRef()); Entry = cast(CGM.GetAddrOfThunk(GD, Thunk)); - + // If needed, replace the old thunk with a bitcast. if (!OldThunkFn->use_empty()) { llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getBitCast(Entry, OldThunkFn->getType()); OldThunkFn->replaceAllUsesWith(NewPtrForOldDecl); } - + // Remove the old thunk. OldThunkFn->eraseFromParent(); } @@ -500,7 +500,7 @@ void CodeGenVTables::maybeEmitThunkForVT void CodeGenVTables::EmitThunks(GlobalDecl GD) { - const CXXMethodDecl *MD = + const CXXMethodDecl *MD = cast(GD.getDecl())->getCanonicalDecl(); // We don't need to generate thunks for the base destructor. @@ -529,6 +529,9 @@ llvm::Constant *CodeGenVTables::CreateVT }; switch (Component.getKind()) { + default: +llvm_unreachable("Unexpected vtable component kind"); + case VTableComponent::CK_VCallOffset: return OffsetConstant(Component.getVCallOffset()); @@ -636,9 +639,9 @@ CodeGenVTables::CreateVTableInitializer( } llvm::GlobalVariable * -CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, - const BaseSubobject , - bool BaseIsVirtual, +CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, + const BaseSubobject , + bool BaseIsVirtual, llvm::GlobalVariable::LinkageTypes Linkage, VTableAddressPointsMapTy& AddressPoints) { if (CGDebugInfo *DI = CGM.getModuleDebugInfo()) @@ -671,7 +674,7 @@ CodeGenVTables::GenerateConstructionVTab Linkage = llvm::GlobalVariable::InternalLinkage; // Create the variable that will hold the construction vtable. - llvm::GlobalVariable *VTable = + llvm::GlobalVariable *VTable = CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, Linkage); CGM.setGlobalVisibility(VTable, RD); @@ -684,7 +687,7 @@ CodeGenVTables::GenerateConstructionVTab // Create and set the initializer. llvm::Constant *Init = CreateVTableInitializer(*VTLayout, RTTI); VTable->setInitializer(Init); - + CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); return VTable; @@ -699,7 +702,7 @@ static bool shouldEmitAvailableExternall /// Compute the required linkage of the vtable for the given class. /// /// Note that we
r279382 - Wdocumentation fix
Author: rksimon Date: Sat Aug 20 15:21:27 2016 New Revision: 279382 URL: http://llvm.org/viewvc/llvm-project?rev=279382=rev Log: Wdocumentation fix Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=279382=279381=279382=diff == --- cfe/trunk/lib/Analysis/CloneDetection.cpp (original) +++ cfe/trunk/lib/Analysis/CloneDetection.cpp Sat Aug 20 15:21:27 2016 @@ -295,7 +295,7 @@ public: /// \brief Collects data of the given Stmt. /// \param S The given statement. /// \param Context The ASTContext of S. - /// \param D The data sink to which all data is forwarded. + /// \param DataConsumer The data sink to which all data is forwarded. StmtDataCollector(const Stmt *S, ASTContext , T ) : Context(Context), DataConsumer(DataConsumer) { this->Visit(S); @@ -695,7 +695,7 @@ static bool areSequencesClones(const Stm /// \param Group A group of presumed clones. The clones are allowed to have a /// different variable pattern and may not be actual clones of each /// other. -/// \param CheckVariablePatterns If true, every clone in a group that was added +/// \param CheckVariablePattern If true, every clone in a group that was added /// to the output follows the same variable pattern as the other /// clones in its group. static void createCloneGroups(std::vector , ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r278503 - Fix Wdocumentation unknown parameter warning
Author: rksimon Date: Fri Aug 12 06:43:57 2016 New Revision: 278503 URL: http://llvm.org/viewvc/llvm-project?rev=278503=rev Log: Fix Wdocumentation unknown parameter warning Modified: cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp Modified: cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp?rev=278503=278502=278503=diff == --- cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp (original) +++ cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp Fri Aug 12 06:43:57 2016 @@ -863,12 +863,12 @@ static bool hasInconsistentOrSupersetQua if (ParamQs == ArgQs) return false; - + // Mismatched (but not missing) Objective-C GC attributes. - if (ParamQs.getObjCGCAttr() != ArgQs.getObjCGCAttr() && + if (ParamQs.getObjCGCAttr() != ArgQs.getObjCGCAttr() && ParamQs.hasObjCGCAttr()) return true; - + // Mismatched (but not missing) address spaces. if (ParamQs.getAddressSpace() != ArgQs.getAddressSpace() && ParamQs.hasAddressSpace()) @@ -878,7 +878,7 @@ static bool hasInconsistentOrSupersetQua if (ParamQs.getObjCLifetime() != ArgQs.getObjCLifetime() && ParamQs.hasObjCLifetime()) return true; - + // CVR qualifier superset. return (ParamQs.getCVRQualifiers() != ArgQs.getCVRQualifiers()) && ((ParamQs.getCVRQualifiers() | ArgQs.getCVRQualifiers()) @@ -1060,7 +1060,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema // Just skip any attempts to deduce from a placeholder type. if (Arg->isPlaceholderType()) return Sema::TDK_Success; - + unsigned Index = TemplateTypeParm->getIndex(); bool RecanonicalizeArg = false; @@ -1100,7 +1100,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema DeducedQs.removeAddressSpace(); if (ParamQs.hasObjCLifetime()) DeducedQs.removeObjCLifetime(); - + // Objective-C ARC: // If template deduction would produce a lifetime qualifier on a type // that is not a lifetime type, template argument deduction fails. @@ -1109,9 +1109,9 @@ DeduceTemplateArgumentsByTypeMatch(Sema Info.Param = cast(TemplateParams->getParam(Index)); Info.FirstArg = TemplateArgument(Param); Info.SecondArg = TemplateArgument(Arg); - return Sema::TDK_Underqualified; + return Sema::TDK_Underqualified; } - + // Objective-C ARC: // If template deduction would produce an argument type with lifetime type // but no lifetime qualifier, the __strong lifetime qualifier is inferred. @@ -1119,10 +1119,10 @@ DeduceTemplateArgumentsByTypeMatch(Sema DeducedType->isObjCLifetimeType() && !DeducedQs.hasObjCLifetime()) DeducedQs.setObjCLifetime(Qualifiers::OCL_Strong); - + DeducedType = S.Context.getQualifiedType(DeducedType.getUnqualifiedType(), DeducedQs); - + if (RecanonicalizeArg) DeducedType = S.Context.getCanonicalType(DeducedType); @@ -1163,7 +1163,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema if (Param.getCVRQualifiers() != Arg.getCVRQualifiers()) return Sema::TDK_NonDeducedMismatch; } - + // If the parameter type is not dependent, there is nothing to deduce. if (!Param->isDependentType()) { if (!(TDF & TDF_SkipNonDependent)) { @@ -1193,7 +1193,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema case Type::Class: llvm_unreachable("deducing non-canonical type: " #Class); #define TYPE(Class, Base) #include "clang/AST/TypeNodes.def" - + case Type::TemplateTypeParm: case Type::SubstTemplateTypeParmPack: llvm_unreachable("Type nodes handled above"); @@ -1211,20 +1211,20 @@ DeduceTemplateArgumentsByTypeMatch(Sema case Type::ObjCObjectPointer: { if (TDF & TDF_SkipNonDependent) return Sema::TDK_Success; - + if (TDF & TDF_IgnoreQualifiers) { Param = Param.getUnqualifiedType(); Arg = Arg.getUnqualifiedType(); } - + return Param == Arg? Sema::TDK_Success : Sema::TDK_NonDeducedMismatch; } - -// _Complex T [placeholder extension] + +// _Complex T [placeholder extension] case Type::Complex: if (const ComplexType *ComplexArg = Arg->getAs()) -return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, - cast(Param)->getElementType(), +return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, +cast(Param)->getElementType(), ComplexArg->getElementType(), Info, Deduced, TDF); @@ -1549,7 +1549,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
r278208 - [X86][AVX] Ensure we only match against 1-byte alignment
Author: rksimon Date: Wed Aug 10 04:59:49 2016 New Revision: 278208 URL: http://llvm.org/viewvc/llvm-project?rev=278208=rev Log: [X86][AVX] Ensure we only match against 1-byte alignment Modified: cfe/trunk/test/CodeGen/avx-builtins.c Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=278208=278207=278208=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Aug 10 04:59:49 2016 @@ -84,14 +84,14 @@ __m256 test_mm256_blendv_ps(__m256 V1, _ __m256d test_mm256_broadcast_pd(__m128d* A) { // CHECK-LABEL: test_mm256_broadcast_pd - // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1 + // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}} // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> return _mm256_broadcast_pd(A); } __m256 test_mm256_broadcast_ps(__m128* A) { // CHECK-LABEL: test_mm256_broadcast_ps - // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1 + // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}} // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> return _mm256_broadcast_ps(A); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] r278111 - Fix Wdocumentation unknown parameter warning
Author: rksimon Date: Tue Aug 9 05:02:11 2016 New Revision: 278111 URL: http://llvm.org/viewvc/llvm-project?rev=278111=rev Log: Fix Wdocumentation unknown parameter warning Modified: clang-tools-extra/trunk/include-fixer/IncludeFixer.h Modified: clang-tools-extra/trunk/include-fixer/IncludeFixer.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/include-fixer/IncludeFixer.h?rev=278111=278110=278111=diff == --- clang-tools-extra/trunk/include-fixer/IncludeFixer.h (original) +++ clang-tools-extra/trunk/include-fixer/IncludeFixer.h Tue Aug 9 05:02:11 2016 @@ -30,7 +30,7 @@ namespace include_fixer { class IncludeFixerActionFactory : public clang::tooling::ToolAction { public: /// \param SymbolIndexMgr A source for matching symbols to header files. - /// \param Context A context for the symbol being queried. + /// \param Contexts The contexts for the symbols being queried. /// \param StyleName Fallback style for reformatting. /// \param MinimizeIncludePaths whether inserted include paths are optimized. IncludeFixerActionFactory(SymbolIndexManager , ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r276889 - Fix unnecessary default switch warning
Author: rksimon Date: Wed Jul 27 11:41:56 2016 New Revision: 276889 URL: http://llvm.org/viewvc/llvm-project?rev=276889=rev Log: Fix unnecessary default switch warning Modified: cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp Modified: cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp?rev=276889=276888=276889=diff == --- cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp (original) +++ cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp Wed Jul 27 11:41:56 2016 @@ -445,8 +445,8 @@ bool PrintingCodeCompleteConsumer::isRes case CodeCompletionResult::RK_Pattern: { return !StringRef(Result.Pattern->getAsString()).startswith(Filter); } - default: llvm_unreachable("Unknown code completion result Kind."); } + llvm_unreachable("Unknown code completion result Kind."); } void ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r276417 - [X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 with generic IR
Author: rksimon Date: Fri Jul 22 08:58:56 2016 New Revision: 276417 URL: http://llvm.org/viewvc/llvm-project?rev=276417=rev Log: [X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 with generic IR As discussed on D22460, I've updated the vbroadcastf128 pd256/ps256 builtins to map directly to generic IR - load+splat a 128-bit vector to both lanes of a 256-bit vector. Fix for PR28657. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=276417=276416=276417=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jul 22 08:58:56 2016 @@ -6619,6 +6619,26 @@ static Value *EmitX86MaskedLoad(CodeGenF return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); } +static Value *EmitX86SubVectorBroadcast(CodeGenFunction , +SmallVectorImpl , +llvm::Type *DstTy, +unsigned SrcSizeInBits, +unsigned Align) { + // Load the subvector. + Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); + + // Create broadcast mask. + unsigned NumDstElts = DstTy->getVectorNumElements(); + unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); + + SmallVectorMask; + for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) +for (unsigned j = 0; j != NumSrcElts; ++j) + Mask.push_back(j); + + return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); +} + static Value *EmitX86Select(CodeGenFunction , Value *Mask, Value *Op0, Value *Op1) { @@ -6995,6 +7015,13 @@ Value *CodeGenFunction::EmitX86BuiltinEx getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); return EmitX86MaskedLoad(*this, Ops, Align); } + + case X86::BI__builtin_ia32_vbroadcastf128_pd256: + case X86::BI__builtin_ia32_vbroadcastf128_ps256: { +llvm::Type *DstTy = ConvertType(E->getType()); +return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 16); + } + case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=276417=276416=276417=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Fri Jul 22 08:58:56 2016 @@ -84,13 +84,15 @@ __m256 test_mm256_blendv_ps(__m256 V1, _ __m256d test_mm256_broadcast_pd(__m128d* A) { // CHECK-LABEL: test_mm256_broadcast_pd - // CHECK: call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %{{.*}}) + // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> return _mm256_broadcast_pd(A); } __m256 test_mm256_broadcast_ps(__m128* A) { // CHECK-LABEL: test_mm256_broadcast_ps - // CHECK: call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %{{.*}}) + // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> return _mm256_broadcast_ps(A); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
This revision was automatically updated to reflect the committed changes. Closed by commit rL276102: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using… (authored by RKSimon). Changed prior to commit: https://reviews.llvm.org/D22105?vs=64534=64653#toc Repository: rL LLVM https://reviews.llvm.org/D22105 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse-builtins.c cfe/trunk/test/CodeGen/sse2-builtins.c Index: cfe/trunk/lib/Headers/xmmintrin.h === --- cfe/trunk/lib/Headers/xmmintrin.h +++ cfe/trunk/lib/Headers/xmmintrin.h @@ -1350,7 +1350,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_si32(__m128 __a) { - return __a[0]; + return __builtin_ia32_cvttss2si((__v4sf)__a); } /// \brief Converts a float value contained in the lower 32 bits of a vector of @@ -1386,7 +1386,7 @@ static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttss_si64(__m128 __a) { - return __a[0]; + return __builtin_ia32_cvttss2si64((__v4sf)__a); } /// \brief Converts two low-order float values in a 128-bit vector of Index: cfe/trunk/lib/Headers/avxintrin.h === --- cfe/trunk/lib/Headers/avxintrin.h +++ cfe/trunk/lib/Headers/avxintrin.h @@ -2117,7 +2117,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { - return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); + return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); } static __inline __m128i __DEFAULT_FN_ATTRS @@ -2129,7 +2129,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { - return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); + return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } static __inline double __DEFAULT_FN_ATTRS Index: cfe/trunk/lib/Headers/emmintrin.h === --- cfe/trunk/lib/Headers/emmintrin.h +++ cfe/trunk/lib/Headers/emmintrin.h @@ -417,8 +417,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b) { - __a[0] = __b[0]; - return __a; + return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -444,7 +443,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { - return __a[0]; + return __builtin_ia32_cvttsd2si((__v2df)__a); } static __inline__ __m64 __DEFAULT_FN_ATTRS @@ -1707,7 +1706,7 @@ static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { - return __a[0]; + return __builtin_ia32_cvttsd2si64((__v2df)__a); } #endif @@ -1755,7 +1754,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { - return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si); + return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); } /// \brief Returns a vector of [4 x i32] where the lowest element is the input Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def === --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -303,7 +303,9 @@ TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse") TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse") TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse") @@ -328,8 +330,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") @@ -455,7 +461,9 @@ TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256,
r276102 - [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
Author: rksimon Date: Wed Jul 20 05:18:01 2016 New Revision: 276102 URL: http://llvm.org/viewvc/llvm-project?rev=276102=rev Log: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ truncating conversions with generic IR instead. It turns out that the behaviour of these intrinsics is different enough from generic IR that this will cause problems, INF/NAN/out of range values are guaranteed to result in a 0x8000 value - which plays havoc with constant folding which converts them to either zero or UNDEF. This is also an issue with the scalar implementations (which were already generic IR and what I was trying to match). This patch changes both scalar and packed versions back to using x86-specific builtins. It also deals with the other scalar conversion cases that are runtime rounding mode dependent and can have similar issues with constant folding. Differential Revision: https://reviews.llvm.org/D22105 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse-builtins.c cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=276102=276101=276102=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jul 20 05:18:01 2016 @@ -303,7 +303,9 @@ TARGET_BUILTIN(__builtin_ia32_pabsd128, TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse") TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse") TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse") @@ -328,8 +330,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") @@ -455,7 +461,9 @@ TARGET_BUILTIN(__builtin_ia32_cmpss, "V4 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx") Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=276102=276101=276102=diff == --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Wed Jul 20 05:18:01 2016 @@ -2117,7 +2117,7 @@ _mm256_cvtps_pd(__m128 __a) static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { - return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); + return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); } static __inline __m128i __DEFAULT_FN_ATTRS @@ -2129,7 +2129,7 @@ _mm256_cvtpd_epi32(__m256d __a) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { - return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); + return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } static __inline double __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=276102=276101=276102=diff
Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
RKSimon updated this revision to Diff 64534. RKSimon added a comment. Removed sitofp conversion changes Repository: rL LLVM https://reviews.llvm.org/D22105 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avxintrin.h lib/Headers/emmintrin.h lib/Headers/xmmintrin.h test/CodeGen/avx-builtins.c test/CodeGen/builtins-x86.c test/CodeGen/sse-builtins.c test/CodeGen/sse2-builtins.c Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -507,7 +507,7 @@ __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { // CHECK-LABEL: test_mm_cvtsd_ss - // CHECK: fptrunc double %{{.*}} to float + // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}}) return _mm_cvtsd_ss(A, B); } @@ -569,21 +569,19 @@ __m128i test_mm_cvttps_epi32(__m128 A) { // CHECK-LABEL: test_mm_cvttps_epi32 - // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> + // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) return _mm_cvttps_epi32(A); } int test_mm_cvttsd_si32(__m128d A) { // CHECK-LABEL: test_mm_cvttsd_si32 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fptosi double %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}}) return _mm_cvttsd_si32(A); } long long test_mm_cvttsd_si64(__m128d A) { // CHECK-LABEL: test_mm_cvttsd_si64 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fptosi double %{{.*}} to i64 + // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}}) return _mm_cvttsd_si64(A); } Index: test/CodeGen/sse-builtins.c === --- test/CodeGen/sse-builtins.c +++ test/CodeGen/sse-builtins.c @@ -295,22 +295,19 @@ int test_mm_cvtt_ss2si(__m128 A) { // CHECK-LABEL: test_mm_cvtt_ss2si - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) return _mm_cvtt_ss2si(A); } int test_mm_cvttss_si32(__m128 A) { // CHECK-LABEL: test_mm_cvttss_si32 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) return _mm_cvttss_si32(A); } long long test_mm_cvttss_si64(__m128 A) { // CHECK-LABEL: test_mm_cvttss_si64 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i64 + // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}}) return _mm_cvttss_si64(A); } Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -287,12 +287,14 @@ tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i); tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f); tmp_i = __builtin_ia32_cvtss2si(tmp_V4f); + tmp_i = __builtin_ia32_cvttss2si(tmp_V4f); tmp_i = __builtin_ia32_rdtsc(); tmp_i = __builtin_ia32_rdtscp(_Ui); tmp_LLi = __builtin_ia32_rdpmc(tmp_i); #ifdef USE_64 tmp_LLi = __builtin_ia32_cvtss2si64(tmp_V4f); + tmp_LLi = __builtin_ia32_cvttss2si64(tmp_V4f); #endif tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f); (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp); @@ -328,10 +330,14 @@ tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d); tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i); tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d); + tmp_i = __builtin_ia32_cvttsd2si(tmp_V2d); + tmp_V4f = __builtin_ia32_cvtsd2ss(tmp_V4f, tmp_V2d); #ifdef USE_64 tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); + tmp_LLi = __builtin_ia32_cvttsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); + tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); (void) __builtin_ia32_mfence(); @@ -410,7 +416,9 @@ tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); + tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); + tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); Index: test/CodeGen/avx-builtins.c === --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -286,13 +286,13 @@ __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 - // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32> + // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}}) return
Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
RKSimon added a comment. In https://reviews.llvm.org/D22105#488566, @eli.friedman wrote: > The x86-specific operation is affected by the rounding mode... but so is a C > cast. This is specified by Annex F in the C standard. > > Of course, you're going to end up with undefined behavior if you actually > modify the rounding mode because LLVM and clang don't support FENV_ACCESS at > the moment. OK I'm going to pull the sitofp conversions from this patch - I have other concerns about them (i.e. we don't treat scalar + vector the same) that will need to be looked at as well. Repository: rL LLVM https://reviews.llvm.org/D22105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
RKSimon added a comment. In https://reviews.llvm.org/D22105#488513, @eli.friedman wrote: > I don't think we need to use x86-specific operations for sitofp-like > conversions; the C cast is equivalent given that a 32 or 64-bit integer is > always in within the range of a 32-bit float. I think the only situation that lossless conversion occurs is i32->f64, every other sitofp conversion could be affected by the rounding control no? Repository: rL LLVM https://reviews.llvm.org/D22105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r274799 - Update switch statement to match coding standards.
Author: rksimon Date: Thu Jul 7 17:32:26 2016 New Revision: 274799 URL: http://llvm.org/viewvc/llvm-project?rev=274799=rev Log: Update switch statement to match coding standards. Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=274799=274798=274799=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Thu Jul 7 17:32:26 2016 @@ -1777,7 +1777,7 @@ public: // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { switch (GPU) { -default: +case CudaArch::UNKNOWN: assert(false && "No GPU arch when compiling CUDA device code."); return ""; case CudaArch::SM_20: @@ -1805,6 +1805,7 @@ public: case CudaArch::SM_62: return "620"; } +llvm_unreachable("unhandled CudaArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
RKSimon created this revision. RKSimon added reviewers: eli.friedman, mkuper, craig.topper, spatel, andreadb. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ truncating conversions with generic IR instead. It turns out that the behaviour of these intrinsics is different enough from generic IR that this will cause problems, INF/NAN/out of range values are guaranteed to result in a 0x8000 value - which plays havoc with constant folding which converts them to either zero or UNDEF. This is also an issue with the scalar implementations (which were already generic IR and what I was trying to match). This patch changes both scalar and packed versions back to using x86-specific builtins. It also deals with the other scalar conversion cases that are runtime rounding mode dependent and can have similar issues with constant folding. A companion llvm patch will be submitted shortly. Repository: rL LLVM http://reviews.llvm.org/D22105 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avxintrin.h lib/Headers/emmintrin.h lib/Headers/xmmintrin.h test/CodeGen/avx-builtins.c test/CodeGen/avx512f-builtins.c test/CodeGen/builtins-x86.c test/CodeGen/sse-builtins.c test/CodeGen/sse2-builtins.c Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -507,7 +507,7 @@ __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { // CHECK-LABEL: test_mm_cvtsd_ss - // CHECK: fptrunc double %{{.*}} to float + // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}}) return _mm_cvtsd_ss(A, B); } @@ -541,8 +541,7 @@ __m128d test_mm_cvtsi64_sd(__m128d A, long long B) { // CHECK-LABEL: test_mm_cvtsi64_sd - // CHECK: sitofp i64 %{{.*}} to double - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 + // CHECK: call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %{{.*}}, i64 %{{.*}}) return _mm_cvtsi64_sd(A, B); } @@ -569,21 +568,19 @@ __m128i test_mm_cvttps_epi32(__m128 A) { // CHECK-LABEL: test_mm_cvttps_epi32 - // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> + // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) return _mm_cvttps_epi32(A); } int test_mm_cvttsd_si32(__m128d A) { // CHECK-LABEL: test_mm_cvttsd_si32 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fptosi double %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}}) return _mm_cvttsd_si32(A); } long long test_mm_cvttsd_si64(__m128d A) { // CHECK-LABEL: test_mm_cvttsd_si64 - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: fptosi double %{{.*}} to i64 + // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}}) return _mm_cvttsd_si64(A); } Index: test/CodeGen/sse-builtins.c === --- test/CodeGen/sse-builtins.c +++ test/CodeGen/sse-builtins.c @@ -263,15 +263,13 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) { // CHECK-LABEL: test_mm_cvtsi32_ss - // CHECK: sitofp i32 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %{{.*}}, i32 %{{.*}}) return _mm_cvtsi32_ss(A, B); } __m128 test_mm_cvtsi64_ss(__m128 A, long long B) { // CHECK-LABEL: test_mm_cvtsi64_ss - // CHECK: sitofp i64 %{{.*}} to float - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + // CHECK: call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %{{.*}}, i64 %{{.*}}) return _mm_cvtsi64_ss(A, B); } @@ -295,22 +293,19 @@ int test_mm_cvtt_ss2si(__m128 A) { // CHECK-LABEL: test_mm_cvtt_ss2si - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) return _mm_cvtt_ss2si(A); } int test_mm_cvttss_si32(__m128 A) { // CHECK-LABEL: test_mm_cvttss_si32 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i32 + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) return _mm_cvttss_si32(A); } long long test_mm_cvttss_si64(__m128 A) { // CHECK-LABEL: test_mm_cvttss_si64 - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: fptosi float %{{.*}} to i64 + // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}}) return _mm_cvttss_si64(A); } Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -286,13 +286,17 @@ tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i); tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f); + tmp_V4f =
r274748 - Fix "not all control paths return a value" warning on MSVC
Author: rksimon Date: Thu Jul 7 06:24:38 2016 New Revision: 274748 URL: http://llvm.org/viewvc/llvm-project?rev=274748=rev Log: Fix "not all control paths return a value" warning on MSVC This time without causing a 'all enums handled' warning on other compilers. Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=274748=274747=274748=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Thu Jul 7 06:24:38 2016 @@ -1777,7 +1777,7 @@ public: // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { switch (GPU) { -case CudaArch::UNKNOWN: +default: assert(false && "No GPU arch when compiling CUDA device code."); return ""; case CudaArch::SM_20: @@ -1804,8 +1804,6 @@ public: return "610"; case CudaArch::SM_62: return "620"; - default: - llvm_unreachable("unhandled CudaArch"); } }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r274746 - Fix "not all control paths return a value" warning on MSVC
Author: rksimon Date: Thu Jul 7 06:12:02 2016 New Revision: 274746 URL: http://llvm.org/viewvc/llvm-project?rev=274746=rev Log: Fix "not all control paths return a value" warning on MSVC Modified: cfe/trunk/lib/Basic/Targets.cpp Modified: cfe/trunk/lib/Basic/Targets.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=274746=274745=274746=diff == --- cfe/trunk/lib/Basic/Targets.cpp (original) +++ cfe/trunk/lib/Basic/Targets.cpp Thu Jul 7 06:12:02 2016 @@ -1804,6 +1804,8 @@ public: return "610"; case CudaArch::SM_62: return "620"; + default: + llvm_unreachable("unhandled CudaArch"); } }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r274554 - [X86][AVX512] Remove vector BROADCAST builtins.
Author: rksimon Date: Tue Jul 5 09:49:31 2016 New Revision: 274554 URL: http://llvm.org/viewvc/llvm-project?rev=274554=rev Log: [X86][AVX512] Remove vector BROADCAST builtins. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=274554=274553=274554=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Jul 5 09:49:31 2016 @@ -996,8 +996,6 @@ TARGET_BUILTIN(__builtin_ia32_pmuldq512_ TARGET_BUILTIN(__builtin_ia32_pmuludq512_mask, "V8LLiV16iV16iV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pbroadcastd512, "V16iV4iV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq512, "V8LLiV2LLiV8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", "avx512f") @@ -1908,8 +1906,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastf TARGET_BUILTIN(__builtin_ia32_broadcastf64x4_512, "V8dV4dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_512, "V16iV4iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_broadcasti64x4_512, "V8LLiV4LLiV8LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_broadcastsd512, "V8dV2dV8dUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_broadcastss512, "V16fV4fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_broadcastmb128, "V2LLiUc","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl") @@ -1927,19 +1923,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcasti TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_256_mask, "V4LLiV2LLiV4LLiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastf32x4_256_mask, "V8fV4fV8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_256_mask, "V8iV4iV8iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastsd256_mask, "V4dV2dV4dUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastss128_mask, "V4fV4fV4fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastss256_mask, "V8fV4fV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_mask, "V32sV8sV32sUi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_mask, "V16cV16cV16cUs","","avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_mask, "V32cV16cV32cUi","","avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_mask, "V8sV8sV8sUc","","avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_mask, "V16sV8sV16sUs","","avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pbroadcastd128_mask, "V4iV4iV4iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastd256_mask, "V8iV4iV8iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_mask, "V2LLiV2LLiV2LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_mask, "V4LLiV2LLiV4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_mask, "V64cV16cV64cULLi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r274544 - [X86][AVX512] Converted the VBROADCAST intrinsics to generic IR
Author: rksimon Date: Tue Jul 5 07:59:33 2016 New Revision: 274544 URL: http://llvm.org/viewvc/llvm-project?rev=274544=rev Log: [X86][AVX512] Converted the VBROADCAST intrinsics to generic IR Modified: cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlbwintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=274544=274543=274544=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Tue Jul 5 07:59:33 2016 @@ -2266,25 +2266,28 @@ _mm512_movm_epi16 (__mmask32 __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastb_epi8 (__m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, - (__v64qi) _mm512_setzero_si512(), - (__mmask64) -1); + return (__m512i)__builtin_shufflevector((__v16qi) __A, + (__v16qi)_mm_undefined_si128(), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, - (__v64qi) __O, - __M); + return (__m512i)__builtin_ia32_selectb_512(__M, + (__v64qi) _mm512_broadcastb_epi8(__A), + (__v64qi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, - (__v64qi) _mm512_setzero_qi(), - __M); + return (__m512i)__builtin_ia32_selectb_512(__M, + (__v64qi) _mm512_broadcastb_epi8(__A), + (__v64qi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -2306,25 +2309,26 @@ _mm512_maskz_set1_epi16 (__mmask32 __M, static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastw_epi16 (__m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, - (__v32hi) _mm512_setzero_si512(), - (__mmask32) -1); + return (__m512i)__builtin_shufflevector((__v8hi) __A, + (__v8hi)_mm_undefined_si128(), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, - (__v32hi) __O, - __M); + return (__m512i)__builtin_ia32_selectw_512(__M, + (__v32hi) _mm512_broadcastw_epi16(__A), + (__v32hi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, - (__v32hi) _mm512_setzero_hi(), - __M); + return (__m512i)__builtin_ia32_selectw_512(__M, + (__v32hi) _mm512_broadcastw_epi16(__A), + (__v32hi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274544=274543=274544=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Jul 5 07:59:33 2016 @@ -195,54 +195,54 @@ _mm512_undefined_epi32(void) { return (__m512i)__builtin_ia32_undef512(); } + static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastd_epi32 (__m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, - (__v16si) -
r274523 - [X86][AVX512] Converted the VSHUFPD intrinsics to generic IR
Author: rksimon Date: Mon Jul 4 16:30:47 2016 New Revision: 274523 URL: http://llvm.org/viewvc/llvm-project?rev=274523=rev Log: [X86][AVX512] Converted the VSHUFPD intrinsics to generic IR Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274523=274522=274523=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Jul 4 16:30:47 2016 @@ -5950,6 +5950,7 @@ _mm512_kmov (__mmask16 __A) #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) + static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, __mmask16 __U, __m512i __B) @@ -7166,23 +7167,27 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m12 (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U)); }) -#define _mm512_shuffle_pd(M, V, imm) __extension__ ({ \ - (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \ - (__v8df)(__m512d)(V), (int)(imm), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1); }) +#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ + (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (((M) & 0x01) >> 0) + 0, \ + (((M) & 0x02) >> 1) + 8, \ + (((M) & 0x04) >> 2) + 2, \ + (((M) & 0x08) >> 3) + 10, \ + (((M) & 0x10) >> 4) + 4, \ + (((M) & 0x20) >> 5) + 12, \ + (((M) & 0x40) >> 6) + 6, \ + (((M) & 0x80) >> 7) + 14); }) -#define _mm512_mask_shuffle_pd(W, U, M, V, imm) __extension__ ({ \ - (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \ - (__v8df)(__m512d)(V), (int)(imm), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U)); }) +#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ + (__v8df)(__m512d)(W)); }) -#define _mm512_maskz_shuffle_pd(U, M, V, imm) __extension__ ({ \ - (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \ - (__v8df)(__m512d)(V), (int)(imm), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U)); }) +#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ + (__v8df)_mm512_setzero_pd()); }) #define _mm512_shuffle_ps(M, V, imm) __extension__ ({ \ (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \ Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=274523=274522=274523=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Mon Jul 4 16:30:47 2016 @@ -7374,51 +7374,45 @@ _mm256_maskz_sra_epi64 (__mmask8 __U, __ (__v4di)_mm256_setzero_si256(), \ (__mmask8)(U)); }) -#define _mm_mask_shuffle_pd(W, U, A, B, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_shufpd128_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(imm), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) - -#define _mm_maskz_shuffle_pd(U, A, B, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_shufpd128_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) - -#define _mm256_mask_shuffle_pd(W, U, A, B, imm) __extension__ ({ \ -
r274502 - [X86][AVX512] Converted the VPERMPD/VPERMQ intrinsics to generic IR
Author: rksimon Date: Mon Jul 4 08:34:44 2016 New Revision: 274502 URL: http://llvm.org/viewvc/llvm-project?rev=274502=rev Log: [X86][AVX512] Converted the VPERMPD/VPERMQ intrinsics to generic IR Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274502=274501=274502=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Jul 4 08:34:44 2016 @@ -8678,35 +8678,49 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128 -(__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R)); }) -#define _mm512_permutex_pd(X, M) __extension__ ({ \ - (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1); }) - -#define _mm512_mask_permutex_pd(W, U, X, M) __extension__ ({ \ - (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U)); }) - -#define _mm512_maskz_permutex_pd(U, X, M) __extension__ ({ \ - (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U)); }) - -#define _mm512_permutex_epi64(X, I) __extension__ ({ \ - (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ - (__v8di)_mm512_undefined_epi32(), \ - (__mmask8)-1); }) - -#define _mm512_mask_permutex_epi64(W, M, X, I) __extension__ ({ \ - (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(M)); }) - -#define _mm512_maskz_permutex_epi64(M, X, I) __extension__ ({ \ - (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(M)); }) +#define _mm512_permutex_pd(X, C) __extension__ ({ \ + (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ + (__v8df)_mm512_undefined_pd(), \ + 0 + (((C) & 0x03) >> 0), \ + 0 + (((C) & 0x0c) >> 2), \ + 0 + (((C) & 0x30) >> 4), \ + 0 + (((C) & 0xc0) >> 6), \ + 4 + (((C) & 0x03) >> 0), \ + 4 + (((C) & 0x0c) >> 2), \ + 4 + (((C) & 0x30) >> 4), \ + 4 + (((C) & 0xc0) >> 6)); }) + +#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permutex_pd((X), (C)), \ + (__v8df)(__m512d)(W)); }) + +#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permutex_pd((X), (C)), \ + (__v8df)_mm512_setzero_pd()); }) + +#define _mm512_permutex_epi64(X, C) __extension__ ({ \ + (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ + (__v8di)_mm512_undefined_epi32(), \ + 0 + (((C) & 0x03) >> 0), \ + 0 + (((C) & 0x0c) >> 2), \ + 0 + (((C) & 0x30) >> 4), \ + 0 + (((C) & 0xc0) >> 6), \ + 4 + (((C) & 0x03) >> 0), \ + 4 + (((C) & 0x0c) >> 2), \ + 4 + (((C) & 0x30) >> 4), \ + 4 + (((C) & 0xc0) >> 6)); }) + +#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_permutex_epi64((X), (C)), \ + (__v8di)(__m512i)(W)); }) + +#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_permutex_epi64((X), (C)), \ +
r274492 - [X86][AVX512] Converted the VPERMILPD/VPERMILPS intrinsics to generic IR
Author: rksimon Date: Mon Jul 4 06:06:15 2016 New Revision: 274492 URL: http://llvm.org/viewvc/llvm-project?rev=274492=rev Log: [X86][AVX512] Converted the VPERMILPD/VPERMILPS intrinsics to generic IR Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274492=274491=274492=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Jul 4 06:06:15 2016 @@ -6540,34 +6540,56 @@ _mm512_mask2_permutex2var_epi64 (__m512i } #define _mm512_permute_pd(X, C) __extension__ ({ \ - (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \ -(__v8df)_mm512_undefined_pd(), \ -(__mmask8)-1); }) + (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ + (__v8df)_mm512_setzero_pd(), \ + 0 + (((C) & 0x01) >> 0), \ + 0 + (((C) & 0x02) >> 1), \ + 2 + (((C) & 0x04) >> 2), \ + 2 + (((C) & 0x08) >> 3), \ + 4 + (((C) & 0x10) >> 4), \ + 4 + (((C) & 0x20) >> 5), \ + 6 + (((C) & 0x40) >> 6), \ + 6 + (((C) & 0x80) >> 7)); }) #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ - (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \ -(__v8df)(__m512d)(W), \ -(__mmask8)(U)); }) + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permute_pd((X), (C)), \ + (__v8df)(__m512d)(W)); }) #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ - (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \ -(__v8df)_mm512_setzero_pd(), \ -(__mmask8)(U)); }) + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permute_pd((X), (C)), \ + (__v8df)_mm512_setzero_pd()); }) #define _mm512_permute_ps(X, C) __extension__ ({ \ - (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1); }) + (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ + (__v16sf)_mm512_setzero_ps(), \ + 0 + (((C) & 0x03) >> 0), \ + 0 + (((C) & 0x0c) >> 2), \ + 0 + (((C) & 0x30) >> 4), \ + 0 + (((C) & 0xc0) >> 6), \ + 4 + (((C) & 0x03) >> 0), \ + 4 + (((C) & 0x0c) >> 2), \ + 4 + (((C) & 0x30) >> 4), \ + 4 + (((C) & 0xc0) >> 6), \ + 8 + (((C) & 0x03) >> 0), \ + 8 + (((C) & 0x0c) >> 2), \ + 8 + (((C) & 0x30) >> 4), \ + 8 + (((C) & 0xc0) >> 6), \ + 12 + (((C) & 0x03) >> 0), \ + 12 + (((C) & 0x0c) >> 2), \ + 12 + (((C) & 0x30) >> 4), \ + 12 + (((C) & 0xc0) >> 6)); }) #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ - (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U)); }) + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_permute_ps((X), (C)), \ + (__v16sf)(__m512)(W)); }) #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ - (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U)); }) + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_permute_ps((X), (C)), \ + (__v16sf)_mm512_setzero_ps()); })
r274442 - [X86][AVX512] Converted the MOVDDUP/MOVSLDUP/MOVSHDUP masked intrinsics to generic IR
Author: rksimon Date: Sat Jul 2 12:16:25 2016 New Revision: 274442 URL: http://llvm.org/viewvc/llvm-project?rev=274442=rev Log: [X86][AVX512] Converted the MOVDDUP/MOVSLDUP/MOVSHDUP masked intrinsics to generic IR llvm companion patch imminent Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=274442=274441=274442=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Jul 2 12:16:25 2016 @@ -1668,9 +1668,6 @@ TARGET_BUILTIN(__builtin_ia32_movdqa64lo TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movddup512_mask, "V8dV8dV8dUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movddup128_mask, "V2dV2dV2dUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movddup256_mask, "V4dV4dV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_gpr_mask, "V64ccV64cULLi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_gpr_mask, "V16ccV16cUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_gpr_mask, "V32ccV32cUi","","avx512bw,avx512vl") @@ -2122,12 +2119,6 @@ TARGET_BUILTIN(__builtin_ia32_compresssf TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movshdup512_mask, "V16fV16fV16fUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movsldup512_mask, "V16fV16fV16fUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movshdup128_mask, "V4fV4fV4fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f") Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274442=274441=274442=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sat Jul 2 12:16:25 2016 @@ -5572,32 +5572,27 @@ _mm512_mask_store_epi64 (void *__P, __mm (__mmask8) __U); } - - static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_movedup_pd (__m512d __A) { - return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, + 0, 0, 2, 2, 4, 4, 6, 6); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, - (__v8df) __W, - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_movedup_pd(__A), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_movedup_pd(__A), + (__v8df)_mm512_setzero_pd()); } #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ @@ -8988,53 +8983,47 @@ _mm512_maskz_compress_epi32 (__mmask16 _ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_movehdup_ps (__m512 __A) { - return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, + 1, 1, 3, 3,
r274126 - [X86][SSE2] Updated tests to match llvm\test\CodeGen\X86\sse2-intrinsics-fast-isel-x86_64.ll
Author: rksimon Date: Wed Jun 29 09:04:08 2016 New Revision: 274126 URL: http://llvm.org/viewvc/llvm-project?rev=274126=rev Log: [X86][SSE2] Updated tests to match llvm\test\CodeGen\X86\sse2-intrinsics-fast-isel-x86_64.ll Modified: cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/test/CodeGen/sse2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=274126=274125=274126=diff == --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse2-builtins.c Wed Jun 29 09:04:08 2016 @@ -701,6 +701,14 @@ __m128i test_mm_loadu_si128(__m128i cons return _mm_loadu_si128(A); } +__m128i test_mm_loadu_si64(void const* A) { + // CHECK-LABEL: test_mm_loadu_si64 + // CHECK: load i64, i64* %{{.*}}, align 1{{$}} + // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 + // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1 + return _mm_loadu_si64(A); +} + __m128i test_mm_madd_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_madd_epi16 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1532,12 +1540,3 @@ __m128i test_mm_xor_si128(__m128i A, __m // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} return _mm_xor_si128(A, B); } - -__m128i test_mm_loadu_si64(void const* A) { - // CHECK-LABEL: test_mm_loadu_si64 - // CHECK: load i64, i64* %{{.*}}, align 1{{$}} - // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 - // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1 - return _mm_loadu_si64(A); -} - ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21504: [X86] add _mm_loadu_si64
RKSimon added inline comments. Comment at: tools/clang/test/CodeGen/sse2-builtins.c:1526 @@ +1525,3 @@ + // CHECK-LABEL: test_mm_loadu_si64 + // CHECK: load i64, i64* %__u + // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 Please can add the alignment operand to the CHECK (it should be align 1)? Repository: rL LLVM http://reviews.llvm.org/D21504 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
RKSimon accepted this revision. RKSimon added a comment. This revision is now accepted and ready to land. LGTM - the compile warning is clear and it could be a problem if we allow undefined values through on pre-AVX targets. The only other thing we could do is handle these in CGBuiltin and 'accept' 0-7 values through on sse/sse2 targets and assert on other values but I don't see how this would be better. http://reviews.llvm.org/D21306 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21373: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.
RKSimon accepted this revision. RKSimon added a comment. This revision is now accepted and ready to land. LGTM Comment at: lib/Headers/bmiintrin.h:284 @@ -283,2 +283,3 @@ ///bits in the operand. + static __inline__ unsigned int __RELAXED_FN_ATTRS Why the newlines? It doesn't match the rest of the header. Comment at: lib/Headers/bmiintrin.h:296 @@ +295,3 @@ +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X m_zuckerman wrote: > We can't use #define Here. The __mm_tzcnt_32(a) intrinsics is deferent from > __tzcnt_u32 in the return value. The __mm_tzcnt_32 intrinsic return sign int > while the __tzcnt_u32 return unsign value. Ah! Missed that bit - thats fine. http://reviews.llvm.org/D21373 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21504: [X86] add _mm_loadu_si64
RKSimon added a subscriber: RKSimon. Comment at: tools/clang/test/CodeGen/sse2-builtins.c:1527 @@ +1526,3 @@ + // CHECK: load i64, i64* %__u + // CHECK: insertelement <2 x i64> undef, i64 %4, i32 0 + // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1 Replace the hardcoded %4 argument with a general pattern match Comment at: tools/clang/test/CodeGen/sse2-builtins.c:1530 @@ +1529,3 @@ + // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + return _mm_loadu_si64(A); Is the store/load necessary? This appears to be just the -O0 stack behaviour Repository: rL LLVM http://reviews.llvm.org/D21504 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r273090 - [X86][XOP] Refreshed builtin tests ready for creation of llvm fast-isel tests
Author: rksimon Date: Sat Jun 18 13:20:14 2016 New Revision: 273090 URL: http://llvm.org/viewvc/llvm-project?rev=273090=rev Log: [X86][XOP] Refreshed builtin tests ready for creation of llvm fast-isel tests Modified: cfe/trunk/test/CodeGen/xop-builtins.c Modified: cfe/trunk/test/CodeGen/xop-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/xop-builtins.c?rev=273090=273089=273090=diff == --- cfe/trunk/test/CodeGen/xop-builtins.c (original) +++ cfe/trunk/test/CodeGen/xop-builtins.c Sat Jun 18 13:20:14 2016 @@ -1,390 +1,393 @@ // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Werror | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s // Don't include mm_malloc.h, it's system specific. #define __MM_MALLOC_H #include +// NOTE: This should match the tests in llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll + __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maccs_epi16 - // CHECK: @llvm.x86.xop.vpmacssww + // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_maccs_epi16(a, b, c); } __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_macc_epi16 - // CHECK: @llvm.x86.xop.vpmacsww + // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_macc_epi16(a, b, c); } __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maccsd_epi16 - // CHECK: @llvm.x86.xop.vpmacsswd + // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) return _mm_maccsd_epi16(a, b, c); } __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maccd_epi16 - // CHECK: @llvm.x86.xop.vpmacswd + // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) return _mm_maccd_epi16(a, b, c); } __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maccs_epi32 - // CHECK: @llvm.x86.xop.vpmacssdd + // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_maccs_epi32(a, b, c); } __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_macc_epi32 - // CHECK: @llvm.x86.xop.vpmacsdd + // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_macc_epi32(a, b, c); } __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maccslo_epi32 - // CHECK: @llvm.x86.xop.vpmacssdql + // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) return _mm_maccslo_epi32(a, b, c); } __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_macclo_epi32 - // CHECK: @llvm.x86.xop.vpmacsdql + // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) return _mm_macclo_epi32(a, b, c); } __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maccshi_epi32 - // CHECK: @llvm.x86.xop.vpmacssdqh + // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) return _mm_maccshi_epi32(a, b, c); } __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_macchi_epi32 - // CHECK: @llvm.x86.xop.vpmacsdqh + // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) return _mm_macchi_epi32(a, b, c); } __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maddsd_epi16 - // CHECK: @llvm.x86.xop.vpmadcsswd + // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) return _mm_maddsd_epi16(a, b, c); } __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_maddd_epi16 - // CHECK: @llvm.x86.xop.vpmadcswd + // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) return _mm_maddd_epi16(a, b, c); } __m128i test_mm_haddw_epi8(__m128i a) { // CHECK-LABEL: test_mm_haddw_epi8 - // CHECK: @llvm.x86.xop.vphaddbw + // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}}) return _mm_haddw_epi8(a); } __m128i test_mm_haddd_epi8(__m128i a) { // CHECK-LABEL: test_mm_haddd_epi8 - // CHECK: @llvm.x86.xop.vphaddbd + // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}}) return
r273086 - [X86][TBM] Refreshed builtin tests ready for creation of llvm fast-isel tests
Author: rksimon Date: Sat Jun 18 12:09:40 2016 New Revision: 273086 URL: http://llvm.org/viewvc/llvm-project?rev=273086=rev Log: [X86][TBM] Refreshed builtin tests ready for creation of llvm fast-isel tests Modified: cfe/trunk/test/CodeGen/tbm-builtins.c Modified: cfe/trunk/test/CodeGen/tbm-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/tbm-builtins.c?rev=273086=273085=273086=diff == --- cfe/trunk/test/CodeGen/tbm-builtins.c (original) +++ cfe/trunk/test/CodeGen/tbm-builtins.c Sat Jun 18 12:09:40 2016 @@ -8,46 +8,56 @@ #include +// NOTE: This should match the tests in llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll + unsigned int test__bextri_u32(unsigned int a) { - // CHECK: call i32 @llvm.x86.tbm.bextri.u32 + // CHECK-LABEL: test__bextri_u32 + // CHECK: call i32 @llvm.x86.tbm.bextri.u32(i32 %{{.*}}, i32 1) return __bextri_u32(a, 1); } unsigned long long test__bextri_u64(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.bextri.u64 + // CHECK-LABEL: test__bextri_u64 + // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2) return __bextri_u64(a, 2); } unsigned long long test__bextri_u64_bigint(unsigned long long a) { - // CHECK: call i64 @llvm.x86.tbm.bextri.u64 + // CHECK-LABEL: test__bextri_u64_bigint + // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887) return __bextri_u64(a, 0x7fLL); } unsigned int test__blcfill_u32(unsigned int a) { + // CHECK-LABEL: test__blcfill_u32 // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1 // CHECK-NEXT: %{{.*}} = and i32 [[TMP]], [[SRC]] return __blcfill_u32(a); } unsigned long long test__blcfill_u64(unsigned long long a) { + // CHECK-LABEL: test__blcfill_u64 // CHECK: [[TMPT:%.*]] = add i64 [[SRC:%.*]], 1 // CHECK-NEXT: %{{.*}} = and i64 [[TMP]], [[SRC]] return __blcfill_u64(a); } unsigned int test__blci_u32(unsigned int a) { + // CHECK-LABEL: test__blci_u32 // CHECK: [[TMP:%.*]] = sub i32 -2, [[SRC:%.*]] // CHECK-NEXT: %{{.*}} = or i32 [[TMP]], [[SRC]] return __blci_u32(a); } unsigned long long test__blci_u64(unsigned long long a) { + // CHECK-LABEL: test__blci_u64 // CHECK: [[TMP:%.*]] = sub i64 -2, [[SRC:%.*]] // CHECK-NEXT: %{{.*}} = or i64 [[TMP]], [[SRC]] return __blci_u64(a); } unsigned int test__blcic_u32(unsigned int a) { + // CHECK-LABEL: test__blcic_u32 // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1 // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC]], 1 // CHECK-NEXT: {{.*}} = and i32 [[TMP2]], [[TMP1]] @@ -55,6 +65,7 @@ unsigned int test__blcic_u32(unsigned in } unsigned long long test__blcic_u64(unsigned long long a) { + // CHECK-LABEL: test__blcic_u64 // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1 // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC]], 1 // CHECK-NEXT: {{.*}} = and i64 [[TMP2]], [[TMP1]] @@ -62,42 +73,49 @@ unsigned long long test__blcic_u64(unsig } unsigned int test__blcmsk_u32(unsigned int a) { + // CHECK-LABEL: test__blcmsk_u32 // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1 // CHECK-NEXT: {{.*}} = xor i32 [[TMP]], [[SRC]] return __blcmsk_u32(a); } unsigned long long test__blcmsk_u64(unsigned long long a) { + // CHECK-LABEL: test__blcmsk_u64 // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1 // CHECK-NEXT: {{.*}} = xor i64 [[TMP]], [[SRC]] return __blcmsk_u64(a); } unsigned int test__blcs_u32(unsigned int a) { + // CHECK-LABEL: test__blcs_u32 // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1 // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]] return __blcs_u32(a); } unsigned long long test__blcs_u64(unsigned long long a) { + // CHECK-LABEL: test__blcs_u64 // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1 // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]] return __blcs_u64(a); } unsigned int test__blsfill_u32(unsigned int a) { + // CHECK-LABEL: test__blsfill_u32 // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], -1 // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]] return __blsfill_u32(a); } unsigned long long test__blsfill_u64(unsigned long long a) { + // CHECK-LABEL: test__blsfill_u64 // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], -1 // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]] return __blsfill_u64(a); } unsigned int test__blsic_u32(unsigned int a) { + // CHECK-LABEL: test__blsic_u32 // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1 // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], -1 // CHECK-NEXT: {{.*}} = or i32 [[TMP2]], [[TMP1]] @@ -105,6 +123,7 @@ unsigned int test__blsic_u32(unsigned in } unsigned long long test__blsic_u64(unsigned long long a) { + // CHECK-LABEL: test__blsic_u64 // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1 // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], -1 // CHECK-NEXT: {{.*}} = or i64 [[TMP2]], [[TMP1]] @@ -112,6 +131,7 @@ unsigned long long
r273003 - [X86][SSE4A] Use native IR for mask movntsd/movntss intrinsics.
Author: rksimon Date: Fri Jun 17 09:28:16 2016 New Revision: 273003 URL: http://llvm.org/viewvc/llvm-project?rev=273003=rev Log: [X86][SSE4A] Use native IR for mask movntsd/movntss intrinsics. Depends on llvm side commit r273002. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/sse4a-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=273003=273002=273003=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 17 09:28:16 2016 @@ -6848,6 +6848,26 @@ Value *CodeGenFunction::EmitX86BuiltinEx SI->setAlignment(1); return SI; } + case X86::BI__builtin_ia32_movntsd: + case X86::BI__builtin_ia32_movntss: { +llvm::MDNode *Node = llvm::MDNode::get( +getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + +// Extract the 0'th element of the source vector. +Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); + +// Convert the type of the pointer to a pointer to the stored type. +Value *BC = Builder.CreateBitCast(Ops[0], +llvm::PointerType::getUnqual(Scl->getType()), + "cast"); + +// Unaligned nontemporal store of the scalar value. +StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); +SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); +SI->setAlignment(1); +return SI; + } + case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: case X86::BI__builtin_ia32_selectb_512: Modified: cfe/trunk/test/CodeGen/sse4a-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse4a-builtins.c?rev=273003=273002=273003=diff == --- cfe/trunk/test/CodeGen/sse4a-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse4a-builtins.c Fri Jun 17 09:28:16 2016 @@ -33,12 +33,14 @@ __m128i test_mm_insert_si64(__m128i x, _ void test_mm_stream_sd(double *p, __m128d a) { // CHECK-LABEL: test_mm_stream_sd - // CHECK: call void @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}}) - _mm_stream_sd(p, a); + // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK: store double %{{.*}}, double* %{{.*}}, align 1, !nontemporal + _mm_stream_sd(p, a); } void test_mm_stream_ss(float *p, __m128 a) { // CHECK-LABEL: test_mm_stream_ss - // CHECK: call void @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}}) + // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK: store float %{{.*}}, float* %{{.*}}, align 1, !nontemporal _mm_stream_ss(p, a); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)
RKSimon added a comment. It seems like part of the need for this is because the _mm_cmp_ps style intrinsics are defined as macros (to get around the problem of trying to use an immediate as an argument): #define _mm_cmp_ps(a, b, c) __extension__ ({ \ (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (c)); }) which means clang can't use a __target__("avx") attribute to stop their use. Given that I'm happy with this patch's approach - anyone else have any suggestions? http://reviews.llvm.org/D21306 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21373: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.
RKSimon added a subscriber: RKSimon. RKSimon added a reviewer: RKSimon. Comment at: lib/Headers/bmiintrin.h:296 @@ -290,1 +295,3 @@ +} + #ifdef __x86_64__ Why not just #define to __tzcnt_u32 like the (many) other duplicate tzcnt intrinsics we have: ``` #define _mm_tzcnt_32(a) (__tzcnt_u32((a))) ``` Same for _mm_tzcnt_64 Also, please can you copy/paste/edit the doxygen comment so that its properly documented? http://reviews.llvm.org/D21373 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.
RKSimon added a comment. http://reviews.llvm.org/D21272 has now been committed, which I think removes the need for this patch. http://reviews.llvm.org/D20359 is still needed (with the additional tests requested by Craig). http://reviews.llvm.org/D20358 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r272541 - Fix unused variable warning
Author: rksimon Date: Mon Jun 13 05:05:19 2016 New Revision: 272541 URL: http://llvm.org/viewvc/llvm-project?rev=272541=rev Log: Fix unused variable warning Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=272541=272540=272541=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jun 13 05:05:19 2016 @@ -243,14 +243,14 @@ static Value *EmitSignBit(CodeGenFunctio // little-Endian, the high bits in big-Endian. Therefore, on big-Endian // we need to shift the high bits down to the low before truncating. Width >>= 1; -if (CGF.getTarget().isBigEndian()) { - Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); - V = CGF.Builder.CreateLShr(V, ShiftCst); -} -// We are truncating value in order to extract the higher-order -// double, which we will be using to extract the sign from. -IntTy = llvm::IntegerType::get(C, Width); -V = CGF.Builder.CreateTrunc(V, IntTy); +if (CGF.getTarget().isBigEndian()) { + Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); + V = CGF.Builder.CreateLShr(V, ShiftCst); +} +// We are truncating value in order to extract the higher-order +// double, which we will be using to extract the sign from. +IntTy = llvm::IntegerType::get(C, Width); +V = CGF.Builder.CreateTrunc(V, IntTy); } Value *Zero = llvm::Constant::getNullValue(IntTy); return CGF.Builder.CreateICmpSLT(V, Zero); @@ -1815,13 +1815,13 @@ RValue CodeGenFunction::EmitBuiltinExpr( case Builtin::BI__builtin_smull_overflow: case Builtin::BI__builtin_smulll_overflow: IntrinsicId = llvm::Intrinsic::smul_with_overflow; - break; -} - - -llvm::Value *Carry; -llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); -Builder.CreateStore(Sum, SumOutPtr); + break; +} + + +llvm::Value *Carry; +llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); +Builder.CreateStore(Sum, SumOutPtr); return RValue::get(Carry); } @@ -3569,13 +3569,13 @@ static Value *packTBLDVectorList(CodeGen llvm::Type *ResTy, unsigned IntID, const char *Name) { SmallVector TblOps; - if (ExtOp) -TblOps.push_back(ExtOp); - - // Build a vector containing sequential number like (0, 1, 2, ..., 15) - SmallVectorIndices; - llvm::VectorType *TblTy = cast(Ops[0]->getType()); - for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { + if (ExtOp) +TblOps.push_back(ExtOp); + + // Build a vector containing sequential number like (0, 1, 2, ..., 15) + SmallVector Indices; + llvm::VectorType *TblTy = cast(Ops[0]->getType()); + for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { Indices.push_back(2*i); Indices.push_back(2*i+1); } @@ -3596,13 +3596,13 @@ static Value *packTBLDVectorList(CodeGen ZeroTbl, Indices, Name)); } - Function *TblF; - TblOps.push_back(IndexOp); - TblF = CGF.CGM.getIntrinsic(IntID, ResTy); - - return CGF.EmitNeonCall(TblF, TblOps, Name); -} - + Function *TblF; + TblOps.push_back(IndexOp); + TblF = CGF.CGM.getIntrinsic(IntID, ResTy); + + return CGF.EmitNeonCall(TblF, TblOps, Name); +} + Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { unsigned Value; switch (BuiltinID) { @@ -4102,13 +4102,13 @@ Value *CodeGenFunction::EmitARMBuiltinEx "vsha1h"); // The ARM _MoveToCoprocessor builtins put the input register value as - // the first argument, but the LLVM intrinsic expects it as the third one. - case ARM::BI_MoveToCoprocessor: - case ARM::BI_MoveToCoprocessor2: { -Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? - Intrinsic::arm_mcr : Intrinsic::arm_mcr2); -return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], - Ops[3], Ops[4], Ops[5]}); + // the first argument, but the LLVM intrinsic expects it as the third one. + case ARM::BI_MoveToCoprocessor: + case ARM::BI_MoveToCoprocessor2: { +Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? + Intrinsic::arm_mcr : Intrinsic::arm_mcr2); +return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], + Ops[3], Ops[4], Ops[5]}); } } @@ -6701,27 +6701,26 @@ Value *CodeGenFunction::EmitX86BuiltinEx if (Ops.size() == 3) return Align; -return EmitX86Select(*this, Ops[4], Align, Ops[3]); - } - - case X86::BI__builtin_ia32_movnti: - case X86::BI__builtin_ia32_movnti64: { -llvm::MDNode
r272540 - [Clang][X86] Convert non-temporal store builtins to generic __builtin_nontemporal_store in headers
Author: rksimon Date: Mon Jun 13 04:57:52 2016 New Revision: 272540 URL: http://llvm.org/viewvc/llvm-project?rev=272540=rev Log: [Clang][X86] Convert non-temporal store builtins to generic __builtin_nontemporal_store in headers We can now use __builtin_nontemporal_store instead of target specific builtins for naturally aligned nontemporal stores which avoids the need for handling in CGBuiltin.cpp The scalar integer nontemporal (unaligned) store builtins will have to wait as __builtin_nontemporal_store currently assumes natural alignment and doesn't accept the 'packed struct' trick that we use for normal unaligned load/stores. The nontemporal loads require further backend support before we can safely convert them to __builtin_nontemporal_load Differential Revision: http://reviews.llvm.org/D21272 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=272540=272539=272540=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Jun 13 04:57:52 2016 @@ -313,7 +313,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtss2si64 TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse") -TARGET_BUILTIN(__builtin_ia32_movntps, "vf*V4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_sfence, "v", "", "sse") TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "", "sse") TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "", "sse") @@ -327,8 +326,6 @@ TARGET_BUILTIN(__builtin_ia32_movmskpd, TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "", "sse2") TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "", "sse2") TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "", "sse2") TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2") @@ -493,9 +490,6 @@ TARGET_BUILTIN(__builtin_ia32_vzeroupper TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "", "avx") TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "", "avx") TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "", "avx") -TARGET_BUILTIN(__builtin_ia32_movntdq256, "vV4LLi*V4LLi", "", "avx") -TARGET_BUILTIN(__builtin_ia32_movntpd256, "vd*V4d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_movntps256, "vf*V8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "", "avx") TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "", "avx") TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "", "avx") @@ -2154,10 +2148,7 @@ TARGET_BUILTIN(__builtin_ia32_kortestzhi TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movntdq512, "vV8LLi*V8LLi","","avx512f") TARGET_BUILTIN(__builtin_ia32_movntdqa512, "V8LLiV8LLi*","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movntpd512, "vd*V8d","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movntps512, "vf*V16f","","avx512f") TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64cIiV64cULLi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_palignr128_mask, "V16cV16cV16cIiV16cUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_palignr256_mask, "V32cV32cV32cIiV32cUi","","avx512bw,avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=272540=272539=272540=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jun 13 04:57:52 2016 @@ -243,14 +243,14 @@ static Value *EmitSignBit(CodeGenFunctio // little-Endian, the high bits in big-Endian. Therefore, on big-Endian // we need to shift the high bits down to the low before truncating. Width >>= 1; -if (CGF.getTarget().isBigEndian()) { - Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); - V = CGF.Builder.CreateLShr(V, ShiftCst); -} -// We are truncating value in order to extract the higher-order -// double, which we will be using to extract the sign from. -IntTy =
Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.
RKSimon added a comment. In http://reviews.llvm.org/D20358#446241, @RKSimon wrote: > In http://reviews.llvm.org/D20358#446220, @ab wrote: > > > In http://reviews.llvm.org/D20358#446218, @ab wrote: > > > > > In http://reviews.llvm.org/D20358#446210, @RKSimon wrote: > > > > > > > Is there any reason why we can't just get rid of all the SSE movnt > > > > builtins and use __builtin_nontemporal_store instead > > > > (http://reviews.llvm.org/D12313)? > > > > > > > > > I wanted to suggest that too, but I think you'd have problems with the > > > (natural?) alignment requirement of __builtin_nontemporal_store (whereas > > > IIRC, movnti & friends accept unaligned pointers). > > > > > > But now that I look at this again, I suppose we could have some > > __attribute__((aligned(1))), or something like r271214. > > > True, luckily that only affects _mm_stream_si32 and _mm_stream_si64 - the > 'real' vector movnt stores all require type alignment. The _mm_stream_load_* > (movntdqa) loads cases should be trivial as well. I've created http://reviews.llvm.org/D21272 that covers the conversion of SSE/SSE2/AVX/AVX512 non-temporal aligned vector stores to use __builtin_nontemporal_store in headers http://reviews.llvm.org/D20358 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21272: [Clang][X86] Convert non-temporal store builtins to generic __builtin_nontemporal_store in headers
RKSimon created this revision. RKSimon added reviewers: craig.topper, ab, spatel, andreadb. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. As discussed on D20358, we can now use __builtin_nontemporal_store instead of target specific builtins for naturally aligned nontemporal stores which avoids the need for handling in CGBuiltin.cpp The scalar integer nontemporal (unaligned) store builtins will have to wait as __builtin_nontemporal_store currently assumes natural alignment and doesn't accept the 'packed struct' trick that we use for normal unaligned load/stores. NOTE: The nontemporal loads require further backend support before we can safely convert them to __builtin_nontemporal_load Repository: rL LLVM http://reviews.llvm.org/D21272 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/avx512fintrin.h lib/Headers/avxintrin.h lib/Headers/emmintrin.h lib/Headers/xmmintrin.h test/CodeGen/avx512f-builtins.c test/CodeGen/builtins-x86.c Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -300,7 +300,6 @@ (void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f); tmp_i = __builtin_ia32_movmskps(tmp_V4f); tmp_i = __builtin_ia32_pmovmskb(tmp_V8c); - (void) __builtin_ia32_movntps(tmp_fp, tmp_V4f); (void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi); (void) __builtin_ia32_sfence(); @@ -318,8 +317,6 @@ #ifdef USE_64 (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi); #endif - (void) __builtin_ia32_movntpd(tmp_dp, tmp_V2d); - (void) __builtin_ia32_movntdq(tmp_V2LLip, tmp_V2LLi); tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d); tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d); @@ -446,9 +443,6 @@ tmp_V4d = __builtin_ia32_vbroadcastf128_pd256(tmp_V2dCp); tmp_V8f = __builtin_ia32_vbroadcastf128_ps256(tmp_V4fCp); tmp_V32c = __builtin_ia32_lddqu256(tmp_cCp); - __builtin_ia32_movntdq256(tmp_V4LLip, tmp_V4LLi); - __builtin_ia32_movntpd256(tmp_dp, tmp_V4d); - __builtin_ia32_movntps256(tmp_fp, tmp_V8f); tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2LLi); tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4i); tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4LLi); Index: test/CodeGen/avx512f-builtins.c === --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -5800,7 +5800,7 @@ void test_mm512_stream_si512(__m512i * __P, __m512i __A) { // CHECK-LABEL: @test_mm512_stream_si512 - // CHECK: @llvm.x86.avx512.storent.q.512 + // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal _mm512_stream_si512(__P, __A); } @@ -5812,13 +5812,13 @@ void test_mm512_stream_pd(double *__P, __m512d __A) { // CHECK-LABEL: @test_mm512_stream_pd - // CHECK: @llvm.x86.avx512.storent.pd.512 + // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, !nontemporal return _mm512_stream_pd(__P, __A); } void test_mm512_stream_ps(float *__P, __m512 __A) { // CHECK-LABEL: @test_mm512_stream_ps - // CHECK: @llvm.x86.avx512.storent.ps.512 + // CHECK: store <16 x float> %{{.*}}, <16 x float>* %{{.*}}, align 64, !nontemporal _mm512_stream_ps(__P, __A); } Index: lib/Headers/xmmintrin.h === --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -2080,7 +2080,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ps(float *__p, __m128 __a) { - __builtin_ia32_movntps(__p, (__v4sf)__a); + __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p); } /// \brief Forces strong memory ordering (serialization) between store Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -2210,13 +2210,13 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a) { - __builtin_ia32_movntpd(__p, (__v2df)__a); + __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p); } static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a) { - __builtin_ia32_movntdq(__p, (__v2di)__a); + __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p); } static __inline__ void __DEFAULT_FN_ATTRS Index: lib/Headers/avxintrin.h === --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -2496,19 +2496,19 @@ static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256(__m256i *__a, __m256i __b) { - __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b); + __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a); } static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd(double *__a, __m256d __b) { -
Re: [PATCH] D21268: [x86] translate SSE packed FP comparison builtins to IR
RKSimon added a comment. Eeep that's certainly a lot more work than just adding a few extra cases! Please add a TODO explaining what we need to do? If there is a problem with the header documentation please can you raise a bugzilla and CC Katya Romanova. http://reviews.llvm.org/D21268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21268: [x86] translate SSE packed FP comparison builtins to IR
RKSimon added a comment. Is there any reason that we shouldn't include the avxintrin.h __builtin_ia32_cmppd/__builtin_ia32_cmpps/__builtin_ia32_cmppd256/__builtin_ia32_cmpps256 packed intrinsics in this CGBuiltin.cpp patch? Since we're heading towards nixing them anyhow. http://reviews.llvm.org/D21268 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
RKSimon accepted this revision. RKSimon added a comment. This revision is now accepted and ready to land. LGTM - add test_mm512_setzero_pd() as well if you can. Comment at: test/CodeGen/avx512f-builtins.c:7291 @@ +7290,3 @@ + +__m512i test_mm512_setzero_ps() +{ __m512d test_mm512_setzero_pd() ? http://reviews.llvm.org/D20871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
RKSimon added a comment. Can you add tests for the existing _mm512_setzero_* intrinsics as well please? http://reviews.llvm.org/D20871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.
RKSimon added a comment. In http://reviews.llvm.org/D20358#446220, @ab wrote: > In http://reviews.llvm.org/D20358#446218, @ab wrote: > > > In http://reviews.llvm.org/D20358#446210, @RKSimon wrote: > > > > > Is there any reason why we can't just get rid of all the SSE movnt > > > builtins and use __builtin_nontemporal_store instead > > > (http://reviews.llvm.org/D12313)? > > > > > > I wanted to suggest that too, but I think you'd have problems with the > > (natural?) alignment requirement of __builtin_nontemporal_store (whereas > > IIRC, movnti & friends accept unaligned pointers). > > > But now that I look at this again, I suppose we could have some > __attribute__((aligned(1))), or something like r271214. True, luckily that only affects _mm_stream_si32 and _mm_stream_si64 - the 'real' vector movnt stores all require type alignment. The _mm_stream_load_* (movntdqa) loads cases should be trivial as well. http://reviews.llvm.org/D20358 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.
RKSimon added a subscriber: RKSimon. RKSimon added a comment. Is there any reason why we can't just get rid of all the SSE movnt builtins and use __builtin_nontemporal_store instead (http://reviews.llvm.org/D12313)? http://reviews.llvm.org/D20358 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32
RKSimon added a subscriber: RKSimon. RKSimon added a comment. Tests? http://reviews.llvm.org/D20871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r271436 - [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)
Author: rksimon Date: Wed Jun 1 16:46:51 2016 New Revision: 271436 URL: http://llvm.org/viewvc/llvm-project?rev=271436=rev Log: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang) The 'cvtt' truncation (round to zero) conversions can be safely represented as generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics. We already do this (implicitly) for the scalar equivalents. Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a lot more backend work to correctly lower (both for debug and optimized builds). Differential Revision: http://reviews.llvm.org/D20859 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=271436=271435=271436=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jun 1 16:46:51 2016 @@ -339,7 +339,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") @@ -462,9 +461,7 @@ TARGET_BUILTIN(__builtin_ia32_cmpps256, TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx") Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=271436=271435=271436=diff == --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Wed Jun 1 16:46:51 2016 @@ -2108,7 +2108,7 @@ _mm256_cvtps_pd(__m128 __a) static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { - return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); + return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); } static __inline __m128i __DEFAULT_FN_ATTRS @@ -2120,7 +2120,7 @@ _mm256_cvtpd_epi32(__m256d __a) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { - return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); + return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); } static __inline double __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271436=271435=271436=diff == --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Wed Jun 1 16:46:51 2016 @@ -1744,7 +1744,7 @@ _mm_cvtps_epi32(__m128 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { - return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); + return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si); } /// \brief Returns a vector of [4 x i32] where the lowest element is the input Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=271436=271435=271436=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Jun 1 16:46:51 2016 @@ -286,13 +286,13 @@ __m256d test_mm256_cvtps_pd(__m128 A) { __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}}) + // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32> return _mm256_cvttpd_epi32(A); } __m256i test_mm256_cvttps_epi32(__m256 A) { // CHECK-LABEL: test_mm256_cvttps_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}}) + // CHECK:
Re: [PATCH] D20859: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)
This revision was automatically updated to reflect the committed changes. Closed by commit rL271436: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero)… (authored by RKSimon). Changed prior to commit: http://reviews.llvm.org/D20859?vs=59204=59284#toc Repository: rL LLVM http://reviews.llvm.org/D20859 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse2-builtins.c Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def === --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -339,7 +339,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") @@ -462,9 +461,7 @@ TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx") Index: cfe/trunk/test/CodeGen/avx-builtins.c === --- cfe/trunk/test/CodeGen/avx-builtins.c +++ cfe/trunk/test/CodeGen/avx-builtins.c @@ -286,13 +286,13 @@ __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}}) + // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32> return _mm256_cvttpd_epi32(A); } __m256i test_mm256_cvttps_epi32(__m256 A) { // CHECK-LABEL: test_mm256_cvttps_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}}) + // CHECK: fptosi <8 x float> %{{.*}} to <8 x i32> return _mm256_cvttps_epi32(A); } Index: cfe/trunk/test/CodeGen/builtins-x86.c === --- cfe/trunk/test/CodeGen/builtins-x86.c +++ cfe/trunk/test/CodeGen/builtins-x86.c @@ -335,7 +335,6 @@ tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); (void) __builtin_ia32_mfence(); @@ -415,9 +414,7 @@ tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); - tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); Index: cfe/trunk/test/CodeGen/sse2-builtins.c === --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -533,7 +533,7 @@ __m128i test_mm_cvttps_epi32(__m128 A) { // CHECK-LABEL: test_mm_cvttps_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) + // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> return _mm_cvttps_epi32(A); } Index: cfe/trunk/lib/Headers/avxintrin.h === --- cfe/trunk/lib/Headers/avxintrin.h +++ cfe/trunk/lib/Headers/avxintrin.h @@ -2108,7 +2108,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { - return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); + return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); } static __inline __m128i __DEFAULT_FN_ATTRS @@ -2120,7 +2120,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { - return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); + return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); } static __inline double __DEFAULT_FN_ATTRS Index: cfe/trunk/lib/Headers/emmintrin.h === --- cfe/trunk/lib/Headers/emmintrin.h +++
[PATCH] D20859: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)
RKSimon created this revision. RKSimon added reviewers: ab, mkuper, craig.topper, spatel, andreadb. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. The 'cvtt' truncation (round to zero) conversions can be safely represented as generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics. We already do this (implicitly) for the scalar equivalents. Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a lot more backend work to correctly lower (both for debug and optimized builds). Repository: rL LLVM http://reviews.llvm.org/D20859 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avxintrin.h lib/Headers/emmintrin.h test/CodeGen/avx-builtins.c test/CodeGen/builtins-x86.c test/CodeGen/sse2-builtins.c Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -533,7 +533,7 @@ __m128i test_mm_cvttps_epi32(__m128 A) { // CHECK-LABEL: test_mm_cvttps_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) + // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> return _mm_cvttps_epi32(A); } Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -335,7 +335,6 @@ tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); (void) __builtin_ia32_mfence(); @@ -415,9 +414,7 @@ tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); - tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); Index: test/CodeGen/avx-builtins.c === --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -286,13 +286,13 @@ __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}}) + // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32> return _mm256_cvttpd_epi32(A); } __m256i test_mm256_cvttps_epi32(__m256 A) { // CHECK-LABEL: test_mm256_cvttps_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}}) + // CHECK: fptosi <8 x float> %{{.*}} to <8 x i32> return _mm256_cvttps_epi32(A); } Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -1744,7 +1744,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { - return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); + return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si); } /// \brief Returns a vector of [4 x i32] where the lowest element is the input Index: lib/Headers/avxintrin.h === --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -2108,7 +2108,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { - return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); + return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); } static __inline __m128i __DEFAULT_FN_ATTRS @@ -2120,7 +2120,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { - return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); + return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); } /* Vector replicate */ Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -335,7 +335,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") @@ -458,9 +457,7 @@ TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
r271219 - [X86][SSE] Added missing tests (merge failure)
Author: rksimon Date: Mon May 30 12:58:38 2016 New Revision: 271219 URL: http://llvm.org/viewvc/llvm-project?rev=271219=rev Log: [X86][SSE] Added missing tests (merge failure) Differential Revision: http://reviews.llvm.org/D20617 Modified: cfe/trunk/test/CodeGen/sse-builtins.c Modified: cfe/trunk/test/CodeGen/sse-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=271219=271218=271219=diff == --- cfe/trunk/test/CodeGen/sse-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse-builtins.c Mon May 30 12:58:38 2016 @@ -651,8 +651,7 @@ void test_mm_store_ps(float* x, __m128 y void test_mm_store_ps1(float* x, __m128 y) { // CHECK-LABEL: test_mm_store_ps1 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} - // CHECK-NEXT: ret void + // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16 _mm_store_ps1(x, y); } @@ -666,8 +665,7 @@ void test_mm_store_ss(float* x, __m128 y void test_mm_store1_ps(float* x, __m128 y) { // CHECK-LABEL: test_mm_store1_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} - // CHECK-NEXT: ret void + // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16 _mm_store1_ps(x, y); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
This revision was automatically updated to reflect the committed changes. Closed by commit rL271218: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer (authored by RKSimon). Changed prior to commit: http://reviews.llvm.org/D20617?vs=58397=58979#toc Repository: rL LLVM http://reviews.llvm.org/D20617 Files: cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/sse2-builtins.c Index: cfe/trunk/test/CodeGen/sse2-builtins.c === --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -1205,6 +1205,13 @@ _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 _mm_store1_pd(x, y); } Index: cfe/trunk/lib/Headers/emmintrin.h === --- cfe/trunk/lib/Headers/emmintrin.h +++ cfe/trunk/lib/Headers/emmintrin.h @@ -588,19 +588,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d*)__dp = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { - struct __mm_store1_pd_struct { -double __u[2]; - } __attribute__((__packed__, __may_alias__)); - ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; - ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; + __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); + _mm_store_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { - *(__m128d *)__dp = __a; + return _mm_store1_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS Index: cfe/trunk/lib/Headers/xmmintrin.h === --- cfe/trunk/lib/Headers/xmmintrin.h +++ cfe/trunk/lib/Headers/xmmintrin.h @@ -1593,22 +1593,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store1_ps(float *__p, __m128 __a) +_mm_store_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); - _mm_storeu_ps(__p, __a); + *(__m128*)__p = __a; } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps1(float *__p, __m128 __a) +_mm_store1_ps(float *__p, __m128 __a) { -return _mm_store1_ps(__p, __a); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); + _mm_store_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps(float *__p, __m128 __a) +_mm_store_ps1(float *__p, __m128 __a) { - *(__m128 *)__p = __a; + return _mm_store1_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS Index: cfe/trunk/test/CodeGen/sse2-builtins.c === --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -1205,6 +1205,13 @@ _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 _mm_store1_pd(x, y); } Index: cfe/trunk/lib/Headers/emmintrin.h === --- cfe/trunk/lib/Headers/emmintrin.h +++ cfe/trunk/lib/Headers/emmintrin.h @@ -588,19 +588,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ +
r271218 - [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
Author: rksimon Date: Mon May 30 12:55:25 2016 New Revision: 271218 URL: http://llvm.org/viewvc/llvm-project?rev=271218=rev Log: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer According to the gcc headers, intel intrinsics docs and msdn codegen the _mm_store1_pd (and its _mm_store_pd1 equivalent) should use an aligned pointer - the clang headers are the only implementation I can find that assume non-aligned stores (by storing with _mm_storeu_pd). Additionally, according to the intel intrinsics docs and msdn codegen the _mm_store1_ps (_mm_store_ps1) requires a similarly aligned pointer. This patch raises the alignment requirements to match the other implementations by calling _mm_store_ps/_mm_store_pd instead. I've also added the missing _mm_store_pd1 intrinsic (which maps to _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps). As a followup I'll update the llvm fast-isel tests to match this codegen. Differential Revision: http://reviews.llvm.org/D20617 Modified: cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271218=271217=271218=diff == --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Mon May 30 12:55:25 2016 @@ -588,19 +588,22 @@ _mm_store_sd(double *__dp, __m128d __a) } static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d*)__dp = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { - struct __mm_store1_pd_struct { -double __u[2]; - } __attribute__((__packed__, __may_alias__)); - ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; - ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; + __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); + _mm_store_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { - *(__m128d *)__dp = __a; + return _mm_store1_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/xmmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=271218=271217=271218=diff == --- cfe/trunk/lib/Headers/xmmintrin.h (original) +++ cfe/trunk/lib/Headers/xmmintrin.h Mon May 30 12:55:25 2016 @@ -1593,22 +1593,22 @@ _mm_storeu_ps(float *__p, __m128 __a) } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store1_ps(float *__p, __m128 __a) +_mm_store_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); - _mm_storeu_ps(__p, __a); + *(__m128*)__p = __a; } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps1(float *__p, __m128 __a) +_mm_store1_ps(float *__p, __m128 __a) { -return _mm_store1_ps(__p, __a); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); + _mm_store_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps(float *__p, __m128 __a) +_mm_store_ps1(float *__p, __m128 __a) { - *(__m128 *)__p = __a; + return _mm_store1_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/sse2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=271218=271217=271218=diff == --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse2-builtins.c Mon May 30 12:55:25 2016 @@ -1205,6 +1205,13 @@ void test_mm_store_pd(double* A, __m128d _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store_si128(__m128i* A, __m void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 _mm_store1_pd(x, y); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org
r271187 - [X86][SSE] Make unsigned integer vector types generally available
Author: rksimon Date: Sun May 29 13:49:08 2016 New Revision: 271187 URL: http://llvm.org/viewvc/llvm-project?rev=271187=rev Log: [X86][SSE] Make unsigned integer vector types generally available As discussed on http://reviews.llvm.org/D20684, move the unsigned integer vector types used for zero extension to make them available for general use. Modified: cfe/trunk/lib/Headers/avx2intrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/smmintrin.h Modified: cfe/trunk/lib/Headers/avx2intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=271187=271186=271187=diff == --- cfe/trunk/lib/Headers/avx2intrin.h (original) +++ cfe/trunk/lib/Headers/avx2intrin.h Sun May 29 13:49:08 2016 @@ -402,42 +402,36 @@ _mm256_cvtepi32_epi64(__m128i __V) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi16(__m128i __V) { - typedef unsigned char __v16qu __attribute__((__vector_size__(16))); return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi32(__m128i __V) { - typedef unsigned char __v16qu __attribute__((__vector_size__(16))); return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi64(__m128i __V) { - typedef unsigned char __v16qu __attribute__((__vector_size__(16))); return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi32(__m128i __V) { - typedef unsigned short __v8hu __attribute__((__vector_size__(16))); return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi64(__m128i __V) { - typedef unsigned short __v8hu __attribute__((__vector_size__(16))); return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu32_epi64(__m128i __V) { - typedef unsigned int __v4su __attribute__((__vector_size__(16))); return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271187=271186=271187=diff == --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Sun May 29 13:49:08 2016 @@ -35,6 +35,12 @@ typedef long long __v2di __attribute__ ( typedef short __v8hi __attribute__((__vector_size__(16))); typedef char __v16qi __attribute__((__vector_size__(16))); +/* Unsigned types */ +typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); +typedef unsigned int __v4su __attribute__((__vector_size__(16))); +typedef unsigned short __v8hu __attribute__((__vector_size__(16))); +typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ typedef signed char __v16qs __attribute__((__vector_size__(16))); Modified: cfe/trunk/lib/Headers/smmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=271187=271186=271187=diff == --- cfe/trunk/lib/Headers/smmintrin.h (original) +++ cfe/trunk/lib/Headers/smmintrin.h Sun May 29 13:49:08 2016 @@ -324,42 +324,36 @@ _mm_cvtepi32_epi64(__m128i __V) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { - typedef unsigned char __v16qu __attribute__((__vector_size__(16))); return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { - typedef unsigned char __v16qu __attribute__((__vector_size__(16))); return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { - typedef unsigned char __v16qu __attribute__((__vector_size__(16))); return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { - typedef unsigned short __v8hu __attribute__((__vector_size__(16))); return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) {
Re: [PATCH] D20359: [LLVM][AVX512][Intrinsics] Convert AVX non-temporal store builtins to LLVM-native IR.
RKSimon added a subscriber: RKSimon. RKSimon added a comment. Should AVX512 store support (non-temporal or otherwise) be added to X86FastISel::X86FastEmitStore ? http://reviews.llvm.org/D20359 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20684: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)
This revision was automatically updated to reflect the committed changes. Closed by commit rL271106: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with… (authored by RKSimon). Changed prior to commit: http://reviews.llvm.org/D20684?vs=58626=58884#toc Repository: rL LLVM http://reviews.llvm.org/D20684 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx2intrin.h cfe/trunk/lib/Headers/smmintrin.h cfe/trunk/test/CodeGen/avx2-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse41-builtins.c Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def === --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -382,12 +382,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxbd128, "V4iV16c", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxbq128, "V2LLiV16c", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxbw128, "V8sV16c", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxdq128, "V2LLiV4i", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxwd128, "V4iV8s", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxwq128, "V2LLiV8s", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pmulld128, "V4iV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "", "sse4.1") @@ -558,18 +552,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxbw256, "V16sV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxbd256, "V8iV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxbq256, "V4LLiV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxwd256, "V8iV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxwq256, "V4LLiV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxdq256, "V4LLiV4i", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxbw256, "V16sV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxbd256, "V8iV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "", "avx2") Index: cfe/trunk/test/CodeGen/sse41-builtins.c === --- cfe/trunk/test/CodeGen/sse41-builtins.c +++ cfe/trunk/test/CodeGen/sse41-builtins.c @@ -119,37 +119,43 @@ __m128i test_mm_cvtepu8_epi16(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi16 - // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> + // CHECK: zext <8 x i8> {{.*}} to <8 x i16> return _mm_cvtepu8_epi16(a); } __m128i test_mm_cvtepu8_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> + // CHECK: zext <4 x i8> {{.*}} to <4 x i32> return _mm_cvtepu8_epi32(a); } __m128i test_mm_cvtepu8_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> + // CHECK: zext <2 x i8> {{.*}} to <2 x i64> return _mm_cvtepu8_epi64(a); } __m128i test_mm_cvtepu16_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}}) + // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> + // CHECK: zext <4 x i16> {{.*}} to <4 x i32> return _mm_cvtepu16_epi32(a); } __m128i test_mm_cvtepu16_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}}) + // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> + // CHECK: zext <2 x i16> {{.*}} to <2 x i64> return _mm_cvtepu16_epi64(a); } __m128i test_mm_cvtepu32_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu32_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}}) + // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> + // CHECK: zext <2 x i32> {{.*}} to <2 x i64>
r271106 - [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)
Author: rksimon Date: Sat May 28 03:12:45 2016 New Revision: 271106 URL: http://llvm.org/viewvc/llvm-project?rev=271106=rev Log: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang) The VPMOVSX and (V)PMOVZX sign/zero extension intrinsics can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics. This patch removes the clang builtins and their use in the sse2/avx headers - a companion patch will remove/auto-upgrade the llvm intrinsics. Note: We already did this for SSE41 PMOVSX sometime ago. Differential Revision: http://reviews.llvm.org/D20684 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx2intrin.h cfe/trunk/lib/Headers/smmintrin.h cfe/trunk/test/CodeGen/avx2-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse41-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=271106=271105=271106=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat May 28 03:12:45 2016 @@ -382,12 +382,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsb128, TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxbd128, "V4iV16c", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxbq128, "V2LLiV16c", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxbw128, "V8sV16c", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxdq128, "V2LLiV4i", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxwd128, "V4iV8s", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmovzxwq128, "V2LLiV8s", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pmulld128, "V4iV4iV4i", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "", "sse4.1") @@ -558,18 +552,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsb256, TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxbw256, "V16sV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxbd256, "V8iV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxbq256, "V4LLiV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxwd256, "V8iV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxwq256, "V4LLiV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovsxdq256, "V4LLiV4i", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxbw256, "V16sV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxbd256, "V8iV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "", "avx2") Modified: cfe/trunk/lib/Headers/avx2intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=271106=271105=271106=diff == --- cfe/trunk/lib/Headers/avx2intrin.h (original) +++ cfe/trunk/lib/Headers/avx2intrin.h Sat May 28 03:12:45 2016 @@ -360,73 +360,85 @@ _mm256_movemask_epi8(__m256i __a) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi16(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return
Re: [PATCH] D20684: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)
RKSimon added a comment. In http://reviews.llvm.org/D20684#442514, @ab wrote: > I'd add the unsigned typedefs with their signed counterparts; no reason not > to. > With that, LGTM. Thanks, I'll do that as a follow up commit. Repository: rL LLVM http://reviews.llvm.org/D20684 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
RKSimon added inline comments. Comment at: lib/Headers/emmintrin.h:598 @@ -594,3 +597,3 @@ static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { majnemer wrote: > You could use `__attribute__((align_value(16)))` no? Technically yes but AFAICT there are no other users of this approach in the headers - is it something that we should be encouraging do you think? Craig - I think you wrote in a commit about dropping the unaligned intrinsics, is that how you'd do it? Repository: rL LLVM http://reviews.llvm.org/D20617 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20684: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)
RKSimon created this revision. RKSimon added reviewers: mkuper, craig.topper, spatel, andreadb. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. The VPMOVSX and (V)PMOVZX sign/zero extension intrinsics can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics. This patch removes the clang builtins and their use in the sse2/avx headers - a companion patch will remove/auto-upgrade the llvm intrinsics. Note: We already did this for SSE41 PMOVSX sometime ago. Repository: rL LLVM http://reviews.llvm.org/D20684 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx2intrin.h lib/Headers/smmintrin.h test/CodeGen/avx2-builtins.c test/CodeGen/builtins-x86.c test/CodeGen/sse41-builtins.c Index: test/CodeGen/sse41-builtins.c === --- test/CodeGen/sse41-builtins.c +++ test/CodeGen/sse41-builtins.c @@ -119,37 +119,43 @@ __m128i test_mm_cvtepu8_epi16(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi16 - // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> + // CHECK: zext <8 x i8> {{.*}} to <8 x i16> return _mm_cvtepu8_epi16(a); } __m128i test_mm_cvtepu8_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> + // CHECK: zext <4 x i8> {{.*}} to <4 x i32> return _mm_cvtepu8_epi32(a); } __m128i test_mm_cvtepu8_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu8_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}}) + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> + // CHECK: zext <2 x i8> {{.*}} to <2 x i64> return _mm_cvtepu8_epi64(a); } __m128i test_mm_cvtepu16_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi32 - // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}}) + // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> + // CHECK: zext <4 x i16> {{.*}} to <4 x i32> return _mm_cvtepu16_epi32(a); } __m128i test_mm_cvtepu16_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu16_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}}) + // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> + // CHECK: zext <2 x i16> {{.*}} to <2 x i64> return _mm_cvtepu16_epi64(a); } __m128i test_mm_cvtepu32_epi64(__m128i a) { // CHECK-LABEL: test_mm_cvtepu32_epi64 - // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}}) + // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> + // CHECK: zext <2 x i32> {{.*}} to <2 x i64> return _mm_cvtepu32_epi64(a); } Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -387,12 +387,6 @@ tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i); tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i); tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s); - tmp_V4i = __builtin_ia32_pmovzxbd128(tmp_V16c); - tmp_V2LLi = __builtin_ia32_pmovzxbq128(tmp_V16c); - tmp_V8s = __builtin_ia32_pmovzxbw128(tmp_V16c); - tmp_V2LLi = __builtin_ia32_pmovzxdq128(tmp_V4i); - tmp_V4i = __builtin_ia32_pmovzxwd128(tmp_V8s); - tmp_V2LLi = __builtin_ia32_pmovzxwq128(tmp_V8s); tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i); tmp_V4i = __builtin_ia32_pmulld128(tmp_V4i, tmp_V4i); tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16); Index: test/CodeGen/avx2-builtins.c === --- test/CodeGen/avx2-builtins.c +++ test/CodeGen/avx2-builtins.c @@ -292,73 +292,79 @@ __m256i test_mm256_cvtepi8_epi16(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi16 - // CHECK: call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %{{.*}}) + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16> return _mm256_cvtepi8_epi16(a); } __m256i test_mm256_cvtepi8_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %{{.*}}) + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> + // CHECK: sext <8 x i8> %{{.*}} to <8 x i32> return _mm256_cvtepi8_epi32(a); } __m256i test_mm256_cvtepi8_epi64(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %{{.*}}) + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> + // CHECK: sext <4 x i8> %{{.*}} to <4 x i64> return _mm256_cvtepi8_epi64(a); } __m256i test_mm256_cvtepi16_epi32(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi16_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %{{.*}}) +
r270836 - [X86][F16C] Improved f16c intrinsics checks
Author: rksimon Date: Thu May 26 05:20:25 2016 New Revision: 270836 URL: http://llvm.org/viewvc/llvm-project?rev=270836=rev Log: [X86][F16C] Improved f16c intrinsics checks Added checks for upper elements being zero'd in scalar conversions Modified: cfe/trunk/test/CodeGen/f16c-builtins.c Modified: cfe/trunk/test/CodeGen/f16c-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/f16c-builtins.c?rev=270836=270835=270836=diff == --- cfe/trunk/test/CodeGen/f16c-builtins.c (original) +++ cfe/trunk/test/CodeGen/f16c-builtins.c Thu May 26 05:20:25 2016 @@ -7,36 +7,50 @@ float test_cvtsh_ss(unsigned short a) { // CHECK-LABEL: test_cvtsh_ss - // CHECK: @llvm.x86.vcvtph2ps.128 + // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7 + // CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}}) + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _cvtsh_ss(a); } unsigned short test_cvtss_sh(float a) { // CHECK-LABEL: test_cvtss_sh - // CHECK: @llvm.x86.vcvtps2ph.128 + // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.00e+00, i32 1 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.00e+00, i32 2 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.00e+00, i32 3 + // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0) + // CHECK: extractelement <8 x i16> %{{.*}}, i32 0 return _cvtss_sh(a, 0); } __m128 test_mm_cvtph_ps(__m128i a) { // CHECK-LABEL: test_mm_cvtph_ps - // CHECK: @llvm.x86.vcvtph2ps.128 + // CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}}) return _mm_cvtph_ps(a); } __m256 test_mm256_cvtph_ps(__m128i a) { // CHECK-LABEL: test_mm256_cvtph_ps - // CHECK: @llvm.x86.vcvtph2ps.256 + // CHECK: call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %{{.*}}) return _mm256_cvtph_ps(a); } __m128i test_mm_cvtps_ph(__m128 a) { // CHECK-LABEL: test_mm_cvtps_ph - // CHECK: @llvm.x86.vcvtps2ph.128 + // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0) return _mm_cvtps_ph(a, 0); } __m128i test_mm256_cvtps_ph(__m256 a) { // CHECK-LABEL: test_mm256_cvtps_ph - // CHECK: @llvm.x86.vcvtps2ph.256 + // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0) return _mm256_cvtps_ph(a, 0); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270833 - [X86][AVX2] Improved checks for float/double mask generation for non-masked gathers
Author: rksimon Date: Thu May 26 04:56:50 2016 New Revision: 270833 URL: http://llvm.org/viewvc/llvm-project?rev=270833=rev Log: [X86][AVX2] Improved checks for float/double mask generation for non-masked gathers Modified: cfe/trunk/test/CodeGen/avx2-builtins.c Modified: cfe/trunk/test/CodeGen/avx2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=270833=270832=270833=diff == --- cfe/trunk/test/CodeGen/avx2-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu May 26 04:56:50 2016 @@ -467,6 +467,7 @@ __m256i test_mm256_mask_i32gather_epi64( __m128d test_mm_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_pd + // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i32gather_pd(b, c, 2); } @@ -479,6 +480,7 @@ __m128d test_mm_mask_i32gather_pd(__m128 __m256d test_mm256_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm256_i32gather_pd + // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0) // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i32gather_pd(b, c, 2); } @@ -491,6 +493,7 @@ __m256d test_mm256_mask_i32gather_pd(__m __m128 test_mm_i32gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_ps + // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i32gather_ps(b, c, 2); } @@ -503,6 +506,7 @@ __m128 test_mm_mask_i32gather_ps(__m128 __m256 test_mm256_i32gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i32gather_ps + // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 0) // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2) return _mm256_i32gather_ps(b, c, 2); } @@ -563,6 +567,7 @@ __m256i test_mm256_mask_i64gather_epi64( __m128d test_mm_i64gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_pd + // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i64gather_pd(b, c, 2); } @@ -575,6 +580,7 @@ __m128d test_mm_mask_i64gather_pd(__m128 __m256d test_mm256_i64gather_pd(double const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_pd + // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0) // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2) return _mm256_i64gather_pd(b, c, 2); } @@ -587,6 +593,7 @@ __m256d test_mm256_mask_i64gather_pd(__m __m128 test_mm_i64gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_ps + // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i64gather_ps(b, c, 2); } @@ -599,6 +606,7 @@ __m128 test_mm_mask_i64gather_ps(__m128 __m128 test_mm256_i64gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_ps + // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm256_i64gather_ps(b, c, 2); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
RKSimon added a comment. In http://reviews.llvm.org/D20617#439200, @craig.topper wrote: > Can you double check gcc's xmmintrin.h again. I'm pretty sure _mm_store1_ps > is calling _mm_storeu_ps. Yes you're right - for gcc _mm_store1_pd is aligned (and there is a comment saying it must be), but _mm_store1_ps is unaligned. The intel intrinsics docs and msvc codegen both set both ps and pd versions to aligned store though. If you wish I can just do the pd fixes - we are alone in doing a extract + 2*movsd - the rest all use shufpd+movapd Suggestions for ps? Repository: rL LLVM http://reviews.llvm.org/D20617 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270708 - [X86][AVX2] Full set of AVX2 intrinsics tests
Author: rksimon Date: Wed May 25 10:10:49 2016 New Revision: 270708 URL: http://llvm.org/viewvc/llvm-project?rev=270708=rev Log: [X86][AVX2] Full set of AVX2 intrinsics tests llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll will be synced to this Modified: cfe/trunk/test/CodeGen/avx2-builtins.c Modified: cfe/trunk/test/CodeGen/avx2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=270708=270707=270708=diff == --- cfe/trunk/test/CodeGen/avx2-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx2-builtins.c Wed May 25 10:10:49 2016 @@ -4,179 +4,113 @@ // Don't include mm_malloc.h, it's system specific. #define __MM_MALLOC_H -#include +#include -__m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) { - // CHECK: @llvm.x86.avx2.mpsadbw({{.*}}, {{.*}}, i8 3) - return _mm256_mpsadbw_epu8(x, y, 3); -} - -__m256i test_mm256_sad_epu8(__m256i x, __m256i y) { - // CHECK: @llvm.x86.avx2.psad.bw - return _mm256_sad_epu8(x, y); -} +// NOTE: This should match the tests in llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll __m256i test_mm256_abs_epi8(__m256i a) { - // CHECK: @llvm.x86.avx2.pabs.b + // CHECK-LABEL: test_mm256_abs_epi8 + // CHECK: call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %{{.*}}) return _mm256_abs_epi8(a); } __m256i test_mm256_abs_epi16(__m256i a) { - // CHECK: @llvm.x86.avx2.pabs.w + // CHECK-LABEL: test_mm256_abs_epi16 + // CHECK: call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %{{.*}}) return _mm256_abs_epi16(a); } __m256i test_mm256_abs_epi32(__m256i a) { - // CHECK: @llvm.x86.avx2.pabs.d + // CHECK-LABEL: test_mm256_abs_epi32 + // CHECK: call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %{{.*}}) return _mm256_abs_epi32(a); } -__m256i test_mm256_packs_epi16(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.packsswb - return _mm256_packs_epi16(a, b); -} - -__m256i test_mm256_packs_epi32(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.packssdw - return _mm256_packs_epi32(a, b); -} - -__m256i test_mm256_packs_epu16(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.packuswb - return _mm256_packus_epi16(a, b); -} - -__m256i test_mm256_packs_epu32(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.packusdw - return _mm256_packus_epi32(a, b); -} - __m256i test_mm256_add_epi8(__m256i a, __m256i b) { + // CHECK-LABEL: test_mm256_add_epi8 // CHECK: add <32 x i8> return _mm256_add_epi8(a, b); } __m256i test_mm256_add_epi16(__m256i a, __m256i b) { + // CHECK-LABEL: test_mm256_add_epi16 // CHECK: add <16 x i16> return _mm256_add_epi16(a, b); } __m256i test_mm256_add_epi32(__m256i a, __m256i b) { + // CHECK-LABEL: test_mm256_add_epi32 // CHECK: add <8 x i32> return _mm256_add_epi32(a, b); } __m256i test_mm256_add_epi64(__m256i a, __m256i b) { + // CHECK-LABEL: test_mm256_add_epi64 // CHECK: add <4 x i64> return _mm256_add_epi64(a, b); } __m256i test_mm256_adds_epi8(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.padds.b + // CHECK-LABEL: test_mm256_adds_epi8 + // CHECK: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_adds_epi8(a, b); } __m256i test_mm256_adds_epi16(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.padds.w + // CHECK-LABEL: test_mm256_adds_epi16 + // CHECK: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_adds_epi16(a, b); } __m256i test_mm256_adds_epu8(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.paddus.b + // CHECK-LABEL: test_mm256_adds_epu8 + // CHECK: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_adds_epu8(a, b); } __m256i test_mm256_adds_epu16(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.paddus.w + // CHECK-LABEL: test_mm256_adds_epu16 + // CHECK: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_adds_epu16(a, b); } __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) { + // CHECK-LABEL: test_mm256_alignr_epi8 // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> return _mm256_alignr_epi8(a, b, 2); } __m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) { + // CHECK-LABEL: test2_mm256_alignr_epi8 // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> return _mm256_alignr_epi8(a, b, 17); } -__m256i test_mm256_sub_epi8(__m256i a, __m256i b) { - // CHECK: sub <32 x i8> - return _mm256_sub_epi8(a, b); -} - -__m256i test_mm256_sub_epi16(__m256i a, __m256i b) { - // CHECK: sub <16 x i16> - return _mm256_sub_epi16(a, b); -} - -__m256i test_mm256_sub_epi32(__m256i a, __m256i b) { - // CHECK: sub <8 x i32> - return _mm256_sub_epi32(a, b); -} - -__m256i test_mm256_sub_epi64(__m256i a, __m256i b) { - // CHECK: sub <4 x i64> - return _mm256_sub_epi64(a, b); -} -
[PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
RKSimon created this revision. RKSimon added reviewers: craig.topper, spatel, andreadb. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. According to the gcc headers, intel intrinsics docs and msdn codegen the _mm_store1_ps/_mm_store1_pd (and their _mm_store_ps1/_mm_store_pd1 analogues) should require an aligned pointer - the clang headers are the only implementation I can find that assume non-aligned stores (by storing with _mm_storeu_ps/_mm_storeu_pd). This patch raises the alignment requirements to match the other implementations by calling _mm_store_ps/_mm_store_pd instead. I've also added the missing _mm_store_pd1 intrinsic (which maps to _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps). As a followup I'll update the llvm fast-isel tests to match this codegen. Repository: rL LLVM http://reviews.llvm.org/D20617 Files: lib/Headers/emmintrin.h lib/Headers/xmmintrin.h test/CodeGen/sse-builtins.c test/CodeGen/sse2-builtins.c Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -1205,6 +1205,13 @@ _mm_store_pd(A, B); } +void test_mm_store_pd1(double* x, __m128d y) { + // CHECK-LABEL: test_mm_store_pd1 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 + _mm_store_pd1(x, y); +} + void test_mm_store_sd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -1220,9 +1227,8 @@ void test_mm_store1_pd(double* x, __m128d y) { // CHECK-LABEL: test_mm_store1_pd - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} - // CHECK: store {{.*}} double* {{.*}}, align 1{{$}} + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 _mm_store1_pd(x, y); } Index: test/CodeGen/sse-builtins.c === --- test/CodeGen/sse-builtins.c +++ test/CodeGen/sse-builtins.c @@ -651,7 +651,7 @@ void test_mm_store_ps1(float* x, __m128 y) { // CHECK-LABEL: test_mm_store_ps1 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}}) + // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 _mm_store_ps1(x, y); } @@ -665,7 +665,7 @@ void test_mm_store1_ps(float* x, __m128 y) { // CHECK-LABEL: test_mm_store1_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}}) + // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 _mm_store1_ps(x, y); } Index: lib/Headers/xmmintrin.h === --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -1590,22 +1590,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store1_ps(float *__p, __m128 __a) +_mm_store_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); - _mm_storeu_ps(__p, __a); + *(__m128 *)__p = __a; } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps1(float *__p, __m128 __a) +_mm_store1_ps(float *__p, __m128 __a) { -return _mm_store1_ps(__p, __a); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); + _mm_store_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_ps(float *__p, __m128 __a) +_mm_store_ps1(float *__p, __m128 __a) { - *(__m128 *)__p = __a; + return _mm_store1_ps(__p, __a); } static __inline__ void __DEFAULT_FN_ATTRS Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -582,19 +582,22 @@ } static __inline__ void __DEFAULT_FN_ATTRS +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d *)__dp = __a; +} + +static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { - struct __mm_store1_pd_struct { -double __u[2]; - } __attribute__((__packed__, __may_alias__)); - ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; - ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; + __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); + _mm_store_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS -_mm_store_pd(double *__dp, __m128d __a) +_mm_store_pd1(double *__dp, __m128d __a) { - *(__m128d *)__dp = __a; + return _mm_store1_pd(__dp, __a); } static __inline__ void __DEFAULT_FN_ATTRS ___
r270679 - [X86][SSE] Updated _mm_store_ps1 test to match _mm_store1_ps
Author: rksimon Date: Wed May 25 04:20:08 2016 New Revision: 270679 URL: http://llvm.org/viewvc/llvm-project?rev=270679=rev Log: [X86][SSE] Updated _mm_store_ps1 test to match _mm_store1_ps Modified: cfe/trunk/test/CodeGen/sse-builtins.c Modified: cfe/trunk/test/CodeGen/sse-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=270679=270678=270679=diff == --- cfe/trunk/test/CodeGen/sse-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse-builtins.c Wed May 25 04:20:08 2016 @@ -651,7 +651,7 @@ void test_mm_store_ps(float* x, __m128 y void test_mm_store_ps1(float* x, __m128 y) { // CHECK-LABEL: test_mm_store_ps1 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer - // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 + // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}}) _mm_store_ps1(x, y); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
This revision was automatically updated to reflect the committed changes. Closed by commit rL270499: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR (authored by RKSimon). Changed prior to commit: http://reviews.llvm.org/D20528?vs=58146=58160#toc Repository: rL LLVM http://reviews.llvm.org/D20528 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse2-builtins.c cfe/trunk/test/CodeGen/target-builtin-error-2.c Index: cfe/trunk/test/CodeGen/target-builtin-error-2.c === --- cfe/trunk/test/CodeGen/target-builtin-error-2.c +++ cfe/trunk/test/CodeGen/target-builtin-error-2.c @@ -5,9 +5,9 @@ // Since we do code generation on a function level this needs to error out since // the subtarget feature won't be available. -__m256d wombat(__m128i a) { +__m128 wombat(__m128i a) { if (__builtin_cpu_supports("avx")) -return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}} +return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}} else -return (__m256d){0, 0, 0, 0}; +return (__m128){0, 0}; } Index: cfe/trunk/test/CodeGen/sse2-builtins.c === --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -415,7 +415,8 @@ __m128d test_mm_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm_cvtepi32_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}}) + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> return _mm_cvtepi32_pd(A); } @@ -445,7 +446,8 @@ __m128d test_mm_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm_cvtps_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}}) + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> + // CHECK: fpext <2 x float> %{{.*}} to <2 x double> return _mm_cvtps_pd(A); } Index: cfe/trunk/test/CodeGen/avx-builtins.c === --- cfe/trunk/test/CodeGen/avx-builtins.c +++ cfe/trunk/test/CodeGen/avx-builtins.c @@ -250,7 +250,7 @@ __m256d test_mm256_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm256_cvtepi32_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}}) + // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> return _mm256_cvtepi32_pd(A); } @@ -280,7 +280,7 @@ __m256d test_mm256_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm256_cvtps_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}}) + // CHECK: fpext <4 x float> %{{.*}} to <4 x double> return _mm256_cvtps_pd(A); } Index: cfe/trunk/test/CodeGen/builtins-x86.c === --- cfe/trunk/test/CodeGen/builtins-x86.c +++ cfe/trunk/test/CodeGen/builtins-x86.c @@ -325,7 +325,6 @@ tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d); tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d); - tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i); tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i); tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d); tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d); @@ -338,7 +337,6 @@ tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f); tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); @@ -423,11 +421,9 @@ tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7); tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0); tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0); - tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i); tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f); tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); Index: cfe/trunk/lib/Headers/avxintrin.h === --- cfe/trunk/lib/Headers/avxintrin.h +++ cfe/trunk/lib/Headers/avxintrin.h @@ -2050,7 +2050,7 @@ static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a) { - return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); + return (__m256d)__builtin_convertvector((__v4si)__a, __v4df); } /// \brief Converts a
r270499 - [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
Author: rksimon Date: Mon May 23 17:13:02 2016 New Revision: 270499 URL: http://llvm.org/viewvc/llvm-project?rev=270499=rev Log: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR Both the (V)CVTDQ2PD(Y) (i32 to f64) and (V)CVTPS2PD(Y) (f32 to f64) conversion instructions are lossless and can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics without affecting final codegen. This patch removes the clang builtins and their use in the sse2/avx headers - a future patch will deal with removing the llvm intrinsics, but that will require a bit more work. Differential Revision: http://reviews.llvm.org/D20528 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse2-builtins.c cfe/trunk/test/CodeGen/target-builtin-error-2.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=270499=270498=270499=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 23 17:13:02 2016 @@ -330,7 +330,6 @@ TARGET_BUILTIN(__builtin_ia32_movntdq, " TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd, "V2dV4i", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2") @@ -338,7 +337,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd, "V2dV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") @@ -466,11 +464,9 @@ TARGET_BUILTIN(__builtin_ia32_blendvps25 TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=270499=270498=270499=diff == --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Mon May 23 17:13:02 2016 @@ -2050,7 +2050,7 @@ _mm256_insert_epi64(__m256i __a, long lo static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a) { - return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); + return (__m256d)__builtin_convertvector((__v4si)__a, __v4df); } /// \brief Converts a vector of [8 x i32] into a vector of [8 x float]. @@ -2102,7 +2102,7 @@ _mm256_cvtps_epi32(__m256 __a) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a) { - return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a); + return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df); } static __inline __m128i __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=270499=270498=270499=diff == --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Mon May 23 17:13:02 2016 @@ -386,13 +386,15 @@ _mm_cvtpd_ps(__m128d __a) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { - return __builtin_ia32_cvtps2pd((__v4sf)__a); + return (__m128d) __builtin_convertvector( + __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { - return __builtin_ia32_cvtdq2pd((__v4si)__a); + return (__m128d) __builtin_convertvector( +
Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
RKSimon added a comment. In http://reviews.llvm.org/D20528#437165, @mkuper wrote: > Presumably, the fast-isel lowering of the IR pattern is already correct, and > in any case, it isn't affected by this patch. > I just want to make sure we don't regress the optimized DAG codegen - that > is, it still produces the instruction we'd expect from the intrinsic (or > something at least as good). The existing llvm\test\CodeGen\X86\vec_fpext.ll and llvm\test\CodeGen\X86\vec_int_to_fp.ll already demonstrate the correct optimized DAG codegen using the same IR as output in the clang\test\CodeGen\*-builtins.c here. Also, the aim is to keep the llvm\test\CodeGen\X86\*-intrinsics-fast-isel.ll tests in sync with the llvm\tools\clang\test\CodeGen\*-builtins.c equivalents. Repository: rL LLVM http://reviews.llvm.org/D20528 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
RKSimon added a comment. In http://reviews.llvm.org/D20528#437117, @mkuper wrote: > Sorry, I didn't intend to imply the rest of the llvm work is necessary for > this to go in. Just that I'd be happier with this patch knowing that we have > a regression test for doing the (shuffle + fpext, say) lowering correctly. I > didn't even mean fast-isel, only the DAG. The fast-isel tests are the most self contained (and are useful to show the non-optimized codegen for every intrinsic in the headers). I can submit them now if you wish. Repository: rL LLVM http://reviews.llvm.org/D20528 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
RKSimon updated this revision to Diff 58146. Repository: rL LLVM http://reviews.llvm.org/D20528 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avxintrin.h lib/Headers/emmintrin.h test/CodeGen/avx-builtins.c test/CodeGen/builtins-x86.c test/CodeGen/sse2-builtins.c test/CodeGen/target-builtin-error-2.c Index: test/CodeGen/target-builtin-error-2.c === --- test/CodeGen/target-builtin-error-2.c +++ test/CodeGen/target-builtin-error-2.c @@ -5,9 +5,9 @@ // Since we do code generation on a function level this needs to error out since // the subtarget feature won't be available. -__m256d wombat(__m128i a) { +__m128 wombat(__m128i a) { if (__builtin_cpu_supports("avx")) -return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}} +return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}} else -return (__m256d){0, 0, 0, 0}; +return (__m128){0, 0}; } Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -415,7 +415,8 @@ __m128d test_mm_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm_cvtepi32_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}}) + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> return _mm_cvtepi32_pd(A); } @@ -445,7 +446,8 @@ __m128d test_mm_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm_cvtps_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}}) + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> + // CHECK: fpext <2 x float> %{{.*}} to <2 x double> return _mm_cvtps_pd(A); } Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -325,7 +325,6 @@ tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d); tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d); - tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i); tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i); tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d); tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d); @@ -338,7 +337,6 @@ tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f); tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); @@ -423,11 +421,9 @@ tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7); tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0); tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0); - tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i); tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f); tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); Index: test/CodeGen/avx-builtins.c === --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -250,7 +250,7 @@ __m256d test_mm256_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm256_cvtepi32_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}}) + // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> return _mm256_cvtepi32_pd(A); } @@ -280,7 +280,7 @@ __m256d test_mm256_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm256_cvtps_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}}) + // CHECK: fpext <4 x float> %{{.*}} to <4 x double> return _mm256_cvtps_pd(A); } Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -386,13 +386,15 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { - return __builtin_ia32_cvtps2pd((__v4sf)__a); + return (__m128d) __builtin_convertvector( + __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { - return __builtin_ia32_cvtdq2pd((__v4si)__a); + return (__m128d) __builtin_convertvector( + __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); } static __inline__ __m128i __DEFAULT_FN_ATTRS Index: lib/Headers/avxintrin.h === --- lib/Headers/avxintrin.h +++
Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
RKSimon added a comment. In http://reviews.llvm.org/D20528#436893, @mkuper wrote: > This looks right, but we may lose some end-to-end tests, since right now we > have a clang-level test that checks the builtin is lowered to the intrinsic, > and (hopefully) a CG-level test that the intrinsic is lowered to the correct > instruction. > Do you know if there are already CG tests that check we correctly lower > these IR patterns to CVTPS2PD, etc? If not, could you add them? I do have the relevant changes for llvm\test\CodeGen\X86\sse2-intrinsics-fast-isel.ll and llvm\test\CodeGen\X86\avx-intrinsics-fast-isel.ll (I spent most of last week adding them all.). Do you want me to setup a separate llvm patch for review? I'm not ready to do the rest of the llvm work (removal of the llvm intrinsics / auto-upgrade etc.). but the fast-isel changes are very simple. Comment at: lib/Headers/emmintrin.h:390 @@ -390,1 +389,3 @@ + return (__m128d) __builtin_convertvector( + __builtin_shufflevector((__v4sf __a, (__v4sf)__a, 0, 1), __v2df); } mkuper wrote: > It looks like there's a missing paren after the first __v4sf. > How does the test compile? Or am I misreading? Sorry, that's me 'fixing' clang-format which I stupidly forgot to run until just before submission. Repository: rL LLVM http://reviews.llvm.org/D20528 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
RKSimon created this revision. RKSimon added reviewers: qcolombet, craig.topper, mkuper, andreadb, spatel. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. Both the (V)CVTDQ2PD(Y) (i32 to f64) and (V)CVTPS2PD(Y) (f32 to f64) conversion instructions are lossless and can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics. This patch removes the clang builtins and their use in the sse2/avx headers - a future patch will deal with removing the llvm intrinsics, but that will require a bit more work. Repository: rL LLVM http://reviews.llvm.org/D20528 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avxintrin.h lib/Headers/emmintrin.h test/CodeGen/avx-builtins.c test/CodeGen/builtins-x86.c test/CodeGen/sse2-builtins.c test/CodeGen/target-builtin-error-2.c Index: test/CodeGen/target-builtin-error-2.c === --- test/CodeGen/target-builtin-error-2.c +++ test/CodeGen/target-builtin-error-2.c @@ -5,9 +5,9 @@ // Since we do code generation on a function level this needs to error out since // the subtarget feature won't be available. -__m256d wombat(__m128i a) { +__m128 wombat(__m128i a) { if (__builtin_cpu_supports("avx")) -return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}} +return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}} else -return (__m256d){0, 0, 0, 0}; +return (__m128){0, 0}; } Index: test/CodeGen/sse2-builtins.c === --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -415,7 +415,8 @@ __m128d test_mm_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm_cvtepi32_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}}) + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> + // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> return _mm_cvtepi32_pd(A); } @@ -445,7 +446,8 @@ __m128d test_mm_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm_cvtps_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}}) + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> + // CHECK: fpext <2 x float> %{{.*}} to <2 x double> return _mm_cvtps_pd(A); } Index: test/CodeGen/builtins-x86.c === --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -325,7 +325,6 @@ tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d); tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d); - tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i); tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i); tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d); tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d); @@ -338,7 +337,6 @@ tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f); tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); @@ -423,11 +421,9 @@ tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7); tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0); tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0); - tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i); tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f); tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); Index: test/CodeGen/avx-builtins.c === --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -250,7 +250,7 @@ __m256d test_mm256_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm256_cvtepi32_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}}) + // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> return _mm256_cvtepi32_pd(A); } @@ -280,7 +280,7 @@ __m256d test_mm256_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm256_cvtps_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}}) + // CHECK: fpext <4 x float> %{{.*}} to <4 x double> return _mm256_cvtps_pd(A); } Index: lib/Headers/emmintrin.h === --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -386,13 +386,15 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { - return __builtin_ia32_cvtps2pd((__v4sf)__a); + return (__m128d)
Re: [PATCH] D20468: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16
This revision was automatically updated to reflect the committed changes. Closed by commit rL270330: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16 (authored by RKSimon). Changed prior to commit: http://reviews.llvm.org/D20468?vs=57927=58045#toc Repository: rL LLVM http://reviews.llvm.org/D20468 Files: cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/test/CodeGen/avx-builtins.c Index: cfe/trunk/test/CodeGen/avx-builtins.c === --- cfe/trunk/test/CodeGen/avx-builtins.c +++ cfe/trunk/test/CodeGen/avx-builtins.c @@ -314,21 +314,19 @@ return _mm256_dp_ps(A, B, 7); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 // CHECK: and i32 %{{.*}}, 31 // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}} - // CHECK: ext i8 %{{.*}} to i32 + // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 32); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 // CHECK: and i32 %{{.*}}, 15 // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}} - // CHECK: ext i16 %{{.*}} to i32 + // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 16); } Index: cfe/trunk/lib/Headers/avxintrin.h === --- cfe/trunk/lib/Headers/avxintrin.h +++ cfe/trunk/lib/Headers/avxintrin.h @@ -1875,13 +1875,13 @@ /// \param __imm ///An immediate integer operand with bits [3:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 16 bits of extended +/// \returns A 32-bit integer containing the extracted 16 bits of zero extended ///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16(__m256i __a, const int __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm & 15]; + return (unsigned short)__b[__imm & 15]; } /// \brief Takes a [32 x i8] vector and returns the vector element value @@ -1897,13 +1897,13 @@ /// \param __imm ///An immediate integer operand with bits [4:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 8 bits of extended packed -///data. +/// \returns A 32-bit integer containing the extracted 8 bits of zero extended +///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8(__m256i __a, const int __imm) { __v32qi __b = (__v32qi)__a; - return __b[__imm & 31]; + return (unsigned char)__b[__imm & 31]; } #ifdef __x86_64__ Index: cfe/trunk/test/CodeGen/avx-builtins.c === --- cfe/trunk/test/CodeGen/avx-builtins.c +++ cfe/trunk/test/CodeGen/avx-builtins.c @@ -314,21 +314,19 @@ return _mm256_dp_ps(A, B, 7); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 // CHECK: and i32 %{{.*}}, 31 // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}} - // CHECK: ext i8 %{{.*}} to i32 + // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 32); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 // CHECK: and i32 %{{.*}}, 15 // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}} - // CHECK: ext i16 %{{.*}} to i32 + // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 16); } Index: cfe/trunk/lib/Headers/avxintrin.h === --- cfe/trunk/lib/Headers/avxintrin.h +++ cfe/trunk/lib/Headers/avxintrin.h @@ -1875,13 +1875,13 @@ /// \param __imm ///An immediate integer operand with bits [3:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 16 bits of extended +/// \returns A 32-bit integer containing the extracted 16 bits of zero extended ///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16(__m256i __a, const int __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm & 15]; + return (unsigned short)__b[__imm & 15]; } /// \brief Takes a [32 x i8] vector and returns the vector element value @@ -1897,13 +1897,13 @@ /// \param __imm ///An immediate integer operand with bits [4:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 8 bits of extended packed -///data. +/// \returns A 32-bit integer containing the extracted 8 bits of zero extended +///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8(__m256i __a, const int __imm) { __v32qi __b = (__v32qi)__a; - return __b[__imm & 31]; + return (unsigned char)__b[__imm & 31]; } #ifdef
r270330 - [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16
Author: rksimon Date: Sat May 21 16:14:35 2016 New Revision: 270330 URL: http://llvm.org/viewvc/llvm-project?rev=270330=rev Log: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16 Ensure _mm256_extract_epi8 and _mm256_extract_epi16 zero extend their i8/i16 result to i32. This matches _mm_extract_epi8 and _mm_extract_epi16. Fix for PR27594 Differential Revision: http://reviews.llvm.org/D20468 Modified: cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/test/CodeGen/avx-builtins.c Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=270330=270329=270330=diff == --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Sat May 21 16:14:35 2016 @@ -1875,13 +1875,13 @@ _mm256_extract_epi32(__m256i __a, const /// \param __imm ///An immediate integer operand with bits [3:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 16 bits of extended +/// \returns A 32-bit integer containing the extracted 16 bits of zero extended ///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16(__m256i __a, const int __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm & 15]; + return (unsigned short)__b[__imm & 15]; } /// \brief Takes a [32 x i8] vector and returns the vector element value @@ -1897,13 +1897,13 @@ _mm256_extract_epi16(__m256i __a, const /// \param __imm ///An immediate integer operand with bits [4:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 8 bits of extended packed -///data. +/// \returns A 32-bit integer containing the extracted 8 bits of zero extended +///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8(__m256i __a, const int __imm) { __v32qi __b = (__v32qi)__a; - return __b[__imm & 31]; + return (unsigned char)__b[__imm & 31]; } #ifdef __x86_64__ Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270330=270329=270330=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Sat May 21 16:14:35 2016 @@ -314,21 +314,19 @@ __m256 test_mm256_dp_ps(__m256 A, __m256 return _mm256_dp_ps(A, B, 7); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 // CHECK: and i32 %{{.*}}, 31 // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}} - // CHECK: ext i8 %{{.*}} to i32 + // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 32); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 // CHECK: and i32 %{{.*}}, 15 // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}} - // CHECK: ext i16 %{{.*}} to i32 + // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 16); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20468: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16
RKSimon added a comment. In http://reviews.llvm.org/D20468#435522, @mkuper wrote: > Could you point me to where in the documentation it says they must be > zero-extended? > The Intel intrinsics guide actually has them with shorter return types: > > __int8 _mm256_extract_epi8 (__m256i a, const int index) > __int16 _mm256_extract_epi16 (__m256i a, const int index) And the gcc version has them wrapped to the _mm_extract_epi* intrinsics which map to the real 128-bit instructions which do zero-extend. I'm open to changing the return types in the headers instead, but really I'd expect the mm256 versions to zero extend like the older mm versions. Repository: rL LLVM http://reviews.llvm.org/D20468 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270227 - [X86][AVX] Added _mm256_testc_si256/_mm256_testnzc_si256/_mm256_testz_si256 tests
Author: rksimon Date: Fri May 20 10:49:17 2016 New Revision: 270227 URL: http://llvm.org/viewvc/llvm-project?rev=270227=rev Log: [X86][AVX] Added _mm256_testc_si256/_mm256_testnzc_si256/_mm256_testz_si256 tests Modified: cfe/trunk/test/CodeGen/avx-builtins.c Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270227=270226=270227=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Fri May 20 10:49:17 2016 @@ -1253,6 +1253,12 @@ int test_mm256_testc_ps(__m256 A, __m256 return _mm256_testc_ps(A, B); } +int test_mm256_testc_si256(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_testc_si256 + // CHECK: call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + return _mm256_testc_si256(A, B); +} + int test_mm_testnzc_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testnzc_pd // CHECK: call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) @@ -1277,6 +1283,12 @@ int test_mm256_testnzc_ps(__m256 A, __m2 return _mm256_testnzc_ps(A, B); } +int test_mm256_testnzc_si256(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_testnzc_si256 + // CHECK: call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + return _mm256_testnzc_si256(A, B); +} + int test_mm_testz_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testz_pd // CHECK: call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) @@ -1301,6 +1313,12 @@ int test_mm256_testz_ps(__m256 A, __m256 return _mm256_testz_ps(A, B); } +int test_mm256_testz_si256(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_testz_si256 + // CHECK: call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + return _mm256_testz_si256(A, B); +} + __m256 test_mm256_undefined_ps() { // CHECK-LABEL: @test_mm256_undefined_ps // CHECK: ret <8 x float> undef ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20468: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16
RKSimon created this revision. RKSimon added reviewers: mkuper, craig.topper, kromanova, spatel. RKSimon added a subscriber: cfe-commits. RKSimon set the repository for this revision to rL LLVM. Ensure _mm256_extract_epi8 and _mm256_extract_epi16 zero extend their i8/i16 result to i32. This matches _mm_extract_epi8 and _mm_extract_epi16. Fix for PR27594 Katya - I've updated the doxygen comments for _mm256_extract_epi8 and _mm256_extract_epi16, I guess this will need to be updated in Sony's intrinsics document for the next regeneration? Repository: rL LLVM http://reviews.llvm.org/D20468 Files: lib/Headers/avxintrin.h test/CodeGen/avx-builtins.c Index: test/CodeGen/avx-builtins.c === --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -314,21 +314,19 @@ return _mm256_dp_ps(A, B, 7); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 // CHECK: and i32 %{{.*}}, 31 // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}} - // CHECK: ext i8 %{{.*}} to i32 + // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 32); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 // CHECK: and i32 %{{.*}}, 15 // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}} - // CHECK: ext i16 %{{.*}} to i32 + // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 16); } Index: lib/Headers/avxintrin.h === --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -1875,13 +1875,13 @@ /// \param __imm ///An immediate integer operand with bits [3:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 16 bits of extended +/// \returns A 32-bit integer containing the extracted 16 bits of zero extended ///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16(__m256i __a, const int __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm & 15]; + return (unsigned short)__b[__imm & 15]; } /// \brief Takes a [32 x i8] vector and returns the vector element value @@ -1897,13 +1897,13 @@ /// \param __imm ///An immediate integer operand with bits [4:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 8 bits of extended packed -///data. +/// \returns A 32-bit integer containing the extracted 8 bits of zero extended +///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8(__m256i __a, const int __imm) { __v32qi __b = (__v32qi)__a; - return __b[__imm & 31]; + return (unsigned char)__b[__imm & 31]; } #ifdef __x86_64__ Index: test/CodeGen/avx-builtins.c === --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -314,21 +314,19 @@ return _mm256_dp_ps(A, B, 7); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 // CHECK: and i32 %{{.*}}, 31 // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}} - // CHECK: ext i8 %{{.*}} to i32 + // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 32); } -// FIXME: ZEXT instead of SEXT int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 // CHECK: and i32 %{{.*}}, 15 // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}} - // CHECK: ext i16 %{{.*}} to i32 + // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 16); } Index: lib/Headers/avxintrin.h === --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -1875,13 +1875,13 @@ /// \param __imm ///An immediate integer operand with bits [3:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 16 bits of extended +/// \returns A 32-bit integer containing the extracted 16 bits of zero extended ///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16(__m256i __a, const int __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm & 15]; + return (unsigned short)__b[__imm & 15]; } /// \brief Takes a [32 x i8] vector and returns the vector element value @@ -1897,13 +1897,13 @@ /// \param __imm ///An immediate integer operand with bits [4:0] determining which vector ///element is extracted and returned. -/// \returns A 32-bit integer containing the extracted 8 bits of extended packed -///data. +/// \returns A 32-bit integer containing the extracted 8 bits of zero extended +///packed data. static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8(__m256i __a, const int
r270212 - [X86][AVX] Added _mm256_extract_epi64 test
Author: rksimon Date: Fri May 20 07:57:21 2016 New Revision: 270212 URL: http://llvm.org/viewvc/llvm-project?rev=270212=rev Log: [X86][AVX] Added _mm256_extract_epi64 test Modified: cfe/trunk/test/CodeGen/avx-builtins.c Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270212=270211=270212=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Fri May 20 07:57:21 2016 @@ -339,6 +339,13 @@ int test_mm256_extract_epi32(__m256i A) return _mm256_extract_epi32(A, 8); } +long long test_mm256_extract_epi64(__m256i A) { + // CHECK-LABEL: test_mm256_extract_epi64 + // CHECK: and i32 %{{.*}}, 3 + // CHECK: extractelement <4 x i64> %{{.*}}, i32 %{{.*}} + return _mm256_extract_epi64(A, 5); +} + __m128d test_mm256_extractf128_pd(__m256d A) { // CHECK-LABEL: test_mm256_extractf128_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270210 - [X86][AVX] Full set of AVX intrinsics tests
Author: rksimon Date: Fri May 20 07:41:02 2016 New Revision: 270210 URL: http://llvm.org/viewvc/llvm-project?rev=270210=rev Log: [X86][AVX] Full set of AVX intrinsics tests llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll will be synced to this Modified: cfe/trunk/test/CodeGen/avx-builtins.c Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270210=270209=270210=diff == --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Fri May 20 07:41:02 2016 @@ -1,84 +1,1297 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Werror | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s // Don't include mm_malloc.h, it's system specific. #define __MM_MALLOC_H -#include +#include -// -// Test LLVM IR codegen of shuffle instructions -// +// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll -__m256 test__mm256_loadu_ps(void* p) { - // CHECK: load <8 x float>, <8 x float>* %{{.*}}, align 1 - return _mm256_loadu_ps(p); +__m256d test_mm256_add_pd(__m256d A, __m256d B) { + // CHECK-LABEL: test_mm256_add_pd + // CHECK: fadd <4 x double> + return _mm256_add_pd(A, B); } -__m256d test__mm256_loadu_pd(void* p) { - // CHECK: load <4 x double>, <4 x double>* %{{.*}}, align 1 - return _mm256_loadu_pd(p); +__m256 test_mm256_add_ps(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_add_ps + // CHECK: fadd <8 x float> + return _mm256_add_ps(A, B); } -__m256i test__mm256_loadu_si256(void* p) { - // CHECK: load <4 x i64>, <4 x i64>* %{{.+}}, align 1 - return _mm256_loadu_si256(p); +__m256d test_mm256_addsub_pd(__m256d A, __m256d B) { + // CHECK-LABEL: test_mm256_addsub_pd + // CHECK: call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + return _mm256_addsub_pd(A, B); } -int test_extract_epi32(__m256i __a) { - // CHECK-LABEL: @test_extract_epi32 - // CHECK: [[SHIFT1:%[^ ]+]] = and i32 %{{.*}}, 7 - // CHECK: extractelement <8 x i32> %{{.*}}, i32 [[SHIFT1]] - return _mm256_extract_epi32(__a, 8); +__m256 test_mm256_addsub_ps(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_addsub_ps + // CHECK: call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + return _mm256_addsub_ps(A, B); } -int test_extract_epi16(__m256i __a) { - // CHECK-LABEL: @test_extract_epi16 - // CHECK: [[SHIFT2:%[^ ]+]] = and i32 %{{.*}}, 15 - // CHECK: extractelement <16 x i16> %{{.*}}, i32 [[SHIFT2]] - return _mm256_extract_epi16(__a, 16); +__m256d test_mm256_and_pd(__m256d A, __m256d B) { + // CHECK-LABEL: test_mm256_and_pd + // CHECK: and <4 x i64> + return _mm256_and_pd(A, B); } -int test_extract_epi8(__m256i __a) { - // CHECK-LABEL: @test_extract_epi8 - // CHECK: [[SHIFT3:%[^ ]+]] = and i32 %{{.*}}, 31 - // CHECK: extractelement <32 x i8> %{{.*}}, i32 [[SHIFT3]] - return _mm256_extract_epi8(__a, 32); +__m256 test_mm256_and_ps(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_and_ps + // CHECK: and <8 x i32> + return _mm256_and_ps(A, B); } -__m256d test_256_blend_pd(__m256d __a, __m256d __b) { - // CHECK-LABEL: @test_256_blend_pd +__m256d test_mm256_andnot_pd(__m256d A, __m256d B) { + // CHECK-LABEL: test_mm256_andnot_pd + // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: and <4 x i64> + return _mm256_andnot_pd(A, B); +} + +__m256 test_mm256_andnot_ps(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_andnot_ps + // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: and <8 x i32> + return _mm256_andnot_ps(A, B); +} + +__m256d test_mm256_blend_pd(__m256d A, __m256d B) { + // CHECK-LABEL: test_mm256_blend_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> - return _mm256_blend_pd(__a, __b, 0x35); + return _mm256_blend_pd(A, B, 0x35); } -__m256 test_256_blend_ps(__m256 __a, __m256 __b) { - // CHECK-LABEL: @test_256_blend_ps +__m256 test_mm256_blend_ps(__m256 A, __m256 B) { + // CHECK-LABEL: test_mm256_blend_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> - return _mm256_blend_ps(__a, __b, 0x35); + return _mm256_blend_ps(A, B, 0x35); +} + +__m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) { + // CHECK-LABEL: test_mm256_blendv_pd + // CHECK: call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + return _mm256_blendv_pd(V1, V2, V3); +} + +__m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) { + // CHECK-LABEL: test_mm256_blendv_ps + // CHECK: call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>
r270079 - [X86][SSE2] Fixed shuffle of results in _mm_cmpnge_sd/_mm_cmpngt_sd tests
Author: rksimon Date: Thu May 19 11:48:59 2016 New Revision: 270079 URL: http://llvm.org/viewvc/llvm-project?rev=270079=rev Log: [X86][SSE2] Fixed shuffle of results in _mm_cmpnge_sd/_mm_cmpngt_sd tests Modified: cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/test/CodeGen/sse2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270079=270078=270079=diff == --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 11:48:59 2016 @@ -306,6 +306,10 @@ __m128d test_mm_cmpnge_pd(__m128d A, __m __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpnge_sd // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 1 + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_cmpnge_sd(A, B); } @@ -318,6 +322,10 @@ __m128d test_mm_cmpngt_pd(__m128d A, __m __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpngt_sd // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 1 + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_cmpngt_sd(A, B); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270043 - [X86][SSE2] Added _mm_move_* tests
Author: rksimon Date: Thu May 19 06:18:49 2016 New Revision: 270043 URL: http://llvm.org/viewvc/llvm-project?rev=270043=rev Log: [X86][SSE2] Added _mm_move_* tests Modified: cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/test/CodeGen/sse2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270043=270042=270043=diff == --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 06:18:49 2016 @@ -721,6 +721,21 @@ __m128d test_mm_min_sd(__m128d A, __m128 return _mm_min_sd(A, B); } +__m128i test_mm_move_epi64(__m128i A) { + // CHECK-LABEL: test_mm_move_epi64 + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> + return _mm_move_epi64(A); +} + +__m128d test_mm_move_sd(__m128d A, __m128d B) { + // CHECK-LABEL: test_mm_move_sd + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 1 + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 + return _mm_move_sd(A, B); +} + int test_mm_movemask_epi8(__m128i A) { // CHECK-LABEL: test_mm_movemask_epi8 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}}) ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r270042 - [X86][SSE2] Added _mm_cast* and _mm_set* tests
Author: rksimon Date: Thu May 19 06:03:48 2016 New Revision: 270042 URL: http://llvm.org/viewvc/llvm-project?rev=270042=rev Log: [X86][SSE2] Added _mm_cast* and _mm_set* tests Modified: cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/test/CodeGen/sse2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270042=270041=270042=diff == --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 06:03:48 2016 @@ -121,6 +121,42 @@ __m128i test_mm_bsrli_si128(__m128i A) { return _mm_bsrli_si128(A, 5); } +__m128 test_mm_castpd_ps(__m128d A) { + // CHECK-LABEL: test_mm_castpd_ps + // CHECK: bitcast <2 x double> %{{.*}} to <4 x float> + return _mm_castpd_ps(A); +} + +__m128i test_mm_castpd_si128(__m128d A) { + // CHECK-LABEL: test_mm_castpd_si128 + // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64> + return _mm_castpd_si128(A); +} + +__m128d test_mm_castps_pd(__m128 A) { + // CHECK-LABEL: test_mm_castps_pd + // CHECK: bitcast <4 x float> %{{.*}} to <2 x double> + return _mm_castps_pd(A); +} + +__m128i test_mm_castps_si128(__m128 A) { + // CHECK-LABEL: test_mm_castps_si128 + // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> + return _mm_castps_si128(A); +} + +__m128d test_mm_castsi128_pd(__m128i A) { + // CHECK-LABEL: test_mm_castsi128_pd + // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double> + return _mm_castsi128_pd(A); +} + +__m128 test_mm_castsi128_ps(__m128i A) { + // CHECK-LABEL: test_mm_castsi128_ps + // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float> + return _mm_castsi128_ps(A); +} + void test_mm_clflush(void* A) { // CHECK-LABEL: test_mm_clflush // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}}) @@ -778,6 +814,206 @@ __m128i test_mm_sad_epu8(__m128i A, __m1 return _mm_sad_epu8(A, B); } +__m128i test_mm_set_epi8(char A, char B, char C, char D, + char E, char F, char G, char H, + char I, char J, char K, char L, + char M, char N, char O, char P) { + // CHECK-LABEL: test_mm_set_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P); +} + +__m128i test_mm_set_epi16(short A, short B, short C, short D, + short E, short F, short G, short H) { + // CHECK-LABEL: test_mm_set_epi16 + // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 + // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 + return _mm_set_epi16(A, B, C, D, E, F, G, H); +} + +__m128i test_mm_set_epi32(int A, int B, int C, int D) { + // CHECK-LABEL: test_mm_set_epi32 + // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 + return _mm_set_epi32(A, B, C, D); +} + +__m128i test_mm_set_epi64(__m64 A, __m64 B) { + // CHECK-LABEL: test_mm_set_epi64 + // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 + // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + return _mm_set_epi64(A, B); +} + +__m128i test_mm_set_epi64x(long long A, long long B) { + // CHECK-LABEL: test_mm_set_epi64x + // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 + // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + return _mm_set_epi64x(A, B); +} + +__m128d test_mm_set_pd(double A, double B)
r270034 - [X86][SSE2] Sync with llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
Author: rksimon Date: Thu May 19 04:52:59 2016 New Revision: 270034 URL: http://llvm.org/viewvc/llvm-project?rev=270034=rev Log: [X86][SSE2] Sync with llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll Modified: cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/test/CodeGen/sse2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270034=270033=270034=diff == --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 04:52:59 2016 @@ -6,6 +6,8 @@ #include +// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll + __m128i test_mm_add_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi8 // CHECK: add <16 x i8> @@ -38,31 +40,34 @@ __m128d test_mm_add_pd(__m128d A, __m128 __m128d test_mm_add_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_add_sd + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fadd double + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_add_sd(A, B); } __m128i test_mm_adds_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_adds_epi8 - // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b + // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_adds_epi8(A, B); } __m128i test_mm_adds_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_adds_epi16 - // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w + // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_adds_epi16(A, B); } __m128i test_mm_adds_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_adds_epu8 - // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b + // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_adds_epu8(A, B); } __m128i test_mm_adds_epu16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_adds_epu16 - // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w + // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_adds_epu16(A, B); } @@ -78,15 +83,29 @@ __m128i test_mm_and_si128(__m128i A, __m return _mm_and_si128(A, B); } +__m128d test_mm_andnot_pd(__m128d A, __m128d B) { + // CHECK-LABEL: test_mm_andnot_pd + // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: and <4 x i32> + return _mm_andnot_pd(A, B); +} + +__m128i test_mm_andnot_si128(__m128i A, __m128i B) { + // CHECK-LABEL: test_mm_andnot_si128 + // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: and <2 x i64> + return _mm_andnot_si128(A, B); +} + __m128i test_mm_avg_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu8 - // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b + // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_avg_epu8(A, B); } __m128i test_mm_avg_epu16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu16 - // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w + // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_avg_epu16(A, B); } @@ -147,6 +166,10 @@ __m128d test_mm_cmpge_pd(__m128d A, __m1 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpge_sd // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 1 + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_cmpge_sd(A, B); } @@ -177,6 +200,10 @@ __m128d test_mm_cmpgt_pd(__m128d A, __m1 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpgt_sd // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 1 + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_cmpgt_sd(A, B); } @@ -308,73 +335,73 @@ __m128d test_mm_cmpunord_sd(__m128d A, _ int test_mm_comieq_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_comieq_sd - // CHECK: call i32 @llvm.x86.sse2.comieq.sd + // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comieq_sd(A, B); } int test_mm_comige_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_comige_sd - // CHECK: call i32 @llvm.x86.sse2.comige.sd + // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comige_sd(A, B); } int