r287114 - Remove duplicate condition (PR30648). NFCI.

2016-11-16 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed Nov 16 10:11:08 2016
New Revision: 287114

URL: http://llvm.org/viewvc/llvm-project?rev=287114=rev
Log:
Remove duplicate condition (PR30648). NFCI.

We only need to check that the bitstream entry is a Record.

Modified:
cfe/trunk/lib/Serialization/ASTReader.cpp

Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=287114=287113=287114=diff
==
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Wed Nov 16 10:11:08 2016
@@ -4189,8 +4189,7 @@ static ASTFileSignature readASTFileSigna
   ASTReader::RecordData Record;
   while (true) {
 llvm::BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
-if (Entry.Kind == llvm::BitstreamEntry::EndBlock ||
-Entry.Kind != llvm::BitstreamEntry::Record)
+if (Entry.Kind != llvm::BitstreamEntry::Record)
   return 0;
 
 Record.clear();


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26686: [X86][AVX512] Replace lossless i32/u32 to f64 conversion intrinsics with generic IR

2016-11-16 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL287088: [X86][AVX512] Replace lossless i32/u32 to f64 
conversion intrinsics with… (authored by RKSimon).

Changed prior to commit:
  https://reviews.llvm.org/D26686?vs=78038=78146#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D26686

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/lib/Headers/avx512fintrin.h
  cfe/trunk/lib/Headers/avx512vlintrin.h
  cfe/trunk/test/CodeGen/avx512f-builtins.c
  cfe/trunk/test/CodeGen/avx512vl-builtins.c

Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def
@@ -961,8 +961,6 @@
 TARGET_BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "", "avx512f")
@@ -1165,8 +1163,6 @@
 TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
@@ -1189,8 +1185,6 @@
 TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl")
Index: cfe/trunk/test/CodeGen/avx512f-builtins.c
===
--- cfe/trunk/test/CodeGen/avx512f-builtins.c
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c
@@ -6949,33 +6949,43 @@
   return _mm512_maskz_cvtepu32_ps (__U,__A);
 }
 
+__m512d test_mm512_cvtepi32_pd (__m256i __A)
+{
+  // CHECK-LABEL: @test_mm512_cvtepi32_pd
+  // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double>
+  return _mm512_cvtepi32_pd (__A);
+}
+
 __m512d test_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
 {
-  // CHECK-LABEL: @test_mm512_mask_cvtepi32_pd 
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512
+  // CHECK-LABEL: @test_mm512_mask_cvtepi32_pd
+  // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double>
+  // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}}
   return _mm512_mask_cvtepi32_pd (__W,__U,__A);
 }
 
 __m512d test_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
 {
-  // CHECK-LABEL: @test_mm512_maskz_cvtepi32_pd 
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512
+  // CHECK-LABEL: @test_mm512_maskz_cvtepi32_pd
+  // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double>
+  // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}}
   return _mm512_maskz_cvtepi32_pd (__U,__A);
 }
 
 __m512d test_mm512_cvtepi32lo_pd (__m512i __A)
 {
   // CHECK-LABEL: @test_mm512_cvtepi32lo_pd
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> 
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512
+  // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double>
   return _mm512_cvtepi32lo_pd (__A);
 }
 
 __m512d test_mm512_mask_cvtepi32lo_pd (__m512d __W, __mmask8 __U, __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvtepi32lo_pd
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> 
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.512
+  // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double>
+  // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}}
   return _mm512_mask_cvtepi32lo_pd (__W, __U, __A);
 }
 
@@ -7000,33 +7010,43 @@
   return _mm512_maskz_cvtepi32_ps (__U,__A);
 }
 
+__m512d test_mm512_cvtepu32_pd(__m256i __A)
+{
+  // 

r287088 - [X86][AVX512] Replace lossless i32/u32 to f64 conversion intrinsics with generic IR

2016-11-16 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed Nov 16 03:27:40 2016
New Revision: 287088

URL: http://llvm.org/viewvc/llvm-project?rev=287088=rev
Log:
[X86][AVX512] Replace lossless i32/u32 to f64 conversion intrinsics with 
generic IR

Both the (V)CVTDQ2PD (i32 to f64) and (V)CVTUDQ2PD (u32 to f64) conversion 
instructions are lossless and can be safely represented as generic 
__builtin_convertvector calls instead of x86 intrinsics without affecting final 
codegen.

This patch removes the clang builtins and their use in the headers - a future 
patch will deal with removing the llvm intrinsics.

This is an extension patch to D20528 which dealt with the equivalent sse/avx 
cases.

Differential Revision: https://reviews.llvm.org/D26686

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=287088=287087=287088=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Nov 16 03:27:40 2016
@@ -961,8 +961,6 @@ TARGET_BUILTIN(__builtin_ia32_maxps512_m
 TARGET_BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "", 
"avx512f")
@@ -1165,8 +1163,6 @@ TARGET_BUILTIN(__builtin_ia32_compressst
 TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
@@ -1189,8 +1185,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttps2dq1
 TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl")

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=287088=287087=287088=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Nov 16 03:27:40 2016
@@ -3740,26 +3740,23 @@ _mm512_maskz_cvtepu32_ps (__mmask16 __U,
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_cvtepi32_pd(__m256i __A)
 {
-  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
-(__v8df)
-_mm512_setzero_pd (),
-(__mmask8) -1);
+  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
 {
-  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
-(__v8df) __W,
-(__mmask8) __U);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+  (__v8df)_mm512_cvtepi32_pd(__A),
+  (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
 {
-  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
-(__v8df) _mm512_setzero_pd (),
-(__mmask8) __U);
+  return 

[PATCH] D26686: [X86][AVX512] Replace lossless i32/u64 to f64 conversion intrinsics with generic IR

2016-11-15 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: craig.topper, igorb, delena.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

Both the (V)CVTDQ2PD (i32 to f64) and (V)CVTDQ2PD (u32 to f64) conversion 
instructions are lossless and can be safely represented as generic 
__builtin_convertvector calls instead of x86 intrinsics without affecting final 
codegen.

This patch removes the clang builtins and their use in the headers - a future 
patch will deal with removing the llvm intrinsics.

This is an extension patch to https://reviews.llvm.org/D20528 which dealt with 
the equivalent sse/avx cases.


Repository:
  rL LLVM

https://reviews.llvm.org/D26686

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avx512fintrin.h
  lib/Headers/avx512vlintrin.h
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/avx512vl-builtins.c

Index: test/CodeGen/avx512vl-builtins.c
===
--- test/CodeGen/avx512vl-builtins.c
+++ test/CodeGen/avx512vl-builtins.c
@@ -1737,23 +1737,29 @@
 }
 __m128d test_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_cvtepi32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128
-  return _mm_mask_cvtepi32_pd(__W,__U,__A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
+  // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}}
+  return _mm_mask_cvtepi32_pd(__W,__U,__A);
 }
 __m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtepi32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128
-  return _mm_maskz_cvtepi32_pd(__U,__A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
+  // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}}
+  return _mm_maskz_cvtepi32_pd(__U,__A);
 }
 __m256d test_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtepi32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256
-  return _mm256_mask_cvtepi32_pd(__W,__U,__A); 
+  // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double>
+  // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}}
+  return _mm256_mask_cvtepi32_pd(__W,__U,__A);
 }
 __m256d test_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtepi32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256
-  return _mm256_maskz_cvtepi32_pd(__U,__A); 
+  // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double>
+  // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}}
+  return _mm256_maskz_cvtepi32_pd(__U,__A);
 }
 __m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_cvtepi32_ps
@@ -2017,33 +2023,40 @@
 }
 __m128d test_mm_cvtepu32_pd(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepu32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
-  return _mm_cvtepu32_pd(__A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double>
+  return _mm_cvtepu32_pd(__A);
 }
 __m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_cvtepu32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
-  return _mm_mask_cvtepu32_pd(__W,__U,__A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double>
+  // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}}
+  return _mm_mask_cvtepu32_pd(__W,__U,__A);
 }
 __m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtepu32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
-  return _mm_maskz_cvtepu32_pd(__U,__A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double>
+  // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}}
+  return _mm_maskz_cvtepu32_pd(__U,__A);
 }
 __m256d test_mm256_cvtepu32_pd(__m128i __A) {
   // CHECK-LABEL: @test_mm256_cvtepu32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
-  return _mm256_cvtepu32_pd(__A); 
+  // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double>
+  return _mm256_cvtepu32_pd(__A);
 }
 __m256d test_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtepu32_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
-  return _mm256_mask_cvtepu32_pd(__W,__U,__A); 
+  // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double>
+  // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}}
+  return _mm256_mask_cvtepu32_pd(__W,__U,__A);
 }
 __m256d test_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: 

r286996 - Fixed spelling in comments. NFCI.

2016-11-15 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Tue Nov 15 12:28:07 2016
New Revision: 286996

URL: http://llvm.org/viewvc/llvm-project?rev=286996=rev
Log:
Fixed spelling in comments. NFCI.

Modified:
cfe/trunk/include/clang/AST/DeclObjC.h
cfe/trunk/include/clang/Basic/TargetInfo.h
cfe/trunk/lib/Sema/SemaExprCXX.cpp

Modified: cfe/trunk/include/clang/AST/DeclObjC.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/DeclObjC.h?rev=286996=286995=286996=diff
==
--- cfe/trunk/include/clang/AST/DeclObjC.h (original)
+++ cfe/trunk/include/clang/AST/DeclObjC.h Tue Nov 15 12:28:07 2016
@@ -394,7 +394,7 @@ public:
 
   /// createImplicitParams - Used to lazily create the self and cmd
   /// implict parameters. This must be called prior to using getSelfDecl()
-  /// or getCmdDecl(). The call is ignored if the implicit paramters
+  /// or getCmdDecl(). The call is ignored if the implicit parameters
   /// have already been created.
   void createImplicitParams(ASTContext , const ObjCInterfaceDecl *ID);
 

Modified: cfe/trunk/include/clang/Basic/TargetInfo.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TargetInfo.h?rev=286996=286995=286996=diff
==
--- cfe/trunk/include/clang/Basic/TargetInfo.h (original)
+++ cfe/trunk/include/clang/Basic/TargetInfo.h Tue Nov 15 12:28:07 2016
@@ -989,7 +989,7 @@ public:
 return false;
   }
 
-  /// \brief Whether target allows to overalign ABI-specified prefered 
alignment
+  /// \brief Whether target allows to overalign ABI-specified preferred 
alignment
   virtual bool allowsLargerPreferedTypeAlignment() const { return true; }
 
   /// \brief Set supported OpenCL extensions and optional core features.

Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=286996=286995=286996=diff
==
--- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Tue Nov 15 12:28:07 2016
@@ -1150,7 +1150,7 @@ bool Sema::CheckCXXThisCapture(SourceLoc
 
   // In the loop below, respect the ByCopy flag only for the closure requesting
   // the capture (i.e. first iteration through the loop below).  Ignore it for
-  // all enclosing closure's upto NumCapturingClosures (since they must be
+  // all enclosing closure's up to NumCapturingClosures (since they must be
   // implicitly capturing the *enclosing  object* by reference (see loop
   // above)).
   assert((!ByCopy ||


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D20359: [LLVM][AVX512][Intrinsics] Convert AVX non-temporal store builtins to LLVM-native IR.

2016-11-15 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

Close this? It appears to have been committed, including the auto upgrade tests 
requested by Craig.


https://reviews.llvm.org/D20359



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r286595 - [X86] Merge (near) duplicate scalar non-temporal store code. NFCI.

2016-11-11 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Nov 11 08:38:34 2016
New Revision: 286595

URL: http://llvm.org/viewvc/llvm-project?rev=286595=rev
Log:
[X86] Merge (near) duplicate scalar non-temporal store code. NFCI.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=286595=286594=286595=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Nov 11 08:38:34 2016
@@ -7514,36 +7514,26 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   }
 
   case X86::BI__builtin_ia32_movnti:
-  case X86::BI__builtin_ia32_movnti64: {
-llvm::MDNode *Node = llvm::MDNode::get(
-getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
-
-// Convert the type of the pointer to a pointer to the stored type.
-Value *BC = Builder.CreateBitCast(Ops[0],
-
llvm::PointerType::getUnqual(Ops[1]->getType()),
-  "cast");
-StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
-SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
-
-// No alignment for scalar intrinsic store.
-SI->setAlignment(1);
-return SI;
-  }
+  case X86::BI__builtin_ia32_movnti64:
   case X86::BI__builtin_ia32_movntsd:
   case X86::BI__builtin_ia32_movntss: {
 llvm::MDNode *Node = llvm::MDNode::get(
 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
 
+Value *Ptr = Ops[0];
+Value *Src = Ops[1];
+
 // Extract the 0'th element of the source vector.
-Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
+if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
+BuiltinID == X86::BI__builtin_ia32_movntss)
+  Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
 
 // Convert the type of the pointer to a pointer to the stored type.
-Value *BC = Builder.CreateBitCast(Ops[0],
-llvm::PointerType::getUnqual(Scl->getType()),
-  "cast");
+Value *BC = Builder.CreateBitCast(
+Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
 
 // Unaligned nontemporal store of the scalar value.
-StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
+StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
 SI->setAlignment(1);
 return SI;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] r286449 - Fix -Wdocumentation warning

2016-11-10 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu Nov 10 07:54:39 2016
New Revision: 286449

URL: http://llvm.org/viewvc/llvm-project?rev=286449=rev
Log:
Fix -Wdocumentation warning

Modified:
clang-tools-extra/trunk/clang-move/ClangMove.h

Modified: clang-tools-extra/trunk/clang-move/ClangMove.h
URL: 
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-move/ClangMove.h?rev=286449=286448=286449=diff
==
--- clang-tools-extra/trunk/clang-move/ClangMove.h (original)
+++ clang-tools-extra/trunk/clang-move/ClangMove.h Thu Nov 10 07:54:39 2016
@@ -72,7 +72,7 @@ public:
   /// \param SearchPath The search path which was used to find the 
IncludeHeader
   /// in the file system. It can be a relative path or an absolute path.
   /// \param FileName The name of file where the IncludeHeader comes from.
-  /// \param IncludeRange The source range for the written file name in 
#include
+  /// \param IncludeFilenameRange The source range for the written file name 
in #include
   ///  (i.e. "old.h" for #include "old.h") in old.cc.
   /// \param SM The SourceManager.
   void addIncludes(llvm::StringRef IncludeHeader, bool IsAngled,


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r285390 - Fix MSVC "not all control paths return a value" warning

2016-10-28 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Oct 28 05:09:35 2016
New Revision: 285390

URL: http://llvm.org/viewvc/llvm-project?rev=285390=rev
Log:
Fix MSVC "not all control paths return a value" warning

Add unreachable after enum switch statement

Modified:
cfe/trunk/lib/Driver/Action.cpp

Modified: cfe/trunk/lib/Driver/Action.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=285390=285389=285390=diff
==
--- cfe/trunk/lib/Driver/Action.cpp (original)
+++ cfe/trunk/lib/Driver/Action.cpp Fri Oct 28 05:09:35 2016
@@ -146,6 +146,8 @@ llvm::StringRef Action::GetOffloadKindNa
 
 // TODO: Add other programming models here.
   }
+
+  llvm_unreachable("invalid offload kind");
 }
 
 void InputAction::anchor() {}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: r285281 - Fix MSVC warning about missing 'this' from lambda default capture mode

2016-10-27 Thread Simon Pilgrim via cfe-commits

> On 27 Oct 2016, at 13:25, Erik Verbruggen <erik.verbrug...@me.com> wrote:
> 
> Eh? Preprocessor::IsFileLexer(const IncludeStackInfo &) is static and doesn't 
> need 'this'... Apparently MSVC mistakingly confuses that method with the 
> non-static Preprocessor::IsFileLexer() method?

You’re not kidding, its incredibly annoying. I can add a comment explaining why 
its there if you wish?
Simon

>> On 27 Oct 2016, at 12:51, Simon Pilgrim via cfe-commits 
>> <cfe-commits@lists.llvm.org> wrote:
>> 
>> Author: rksimon
>> Date: Thu Oct 27 05:51:29 2016
>> New Revision: 285281
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=285281=rev
>> Log:
>> Fix MSVC warning about missing 'this' from lambda default capture mode
>> 
>> Modified:
>>   cfe/trunk/lib/Lex/PPLexerChange.cpp
>> 
>> Modified: cfe/trunk/lib/Lex/PPLexerChange.cpp
>> URL: 
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPLexerChange.cpp?rev=285281=285280=285281=diff
>> ==
>> --- cfe/trunk/lib/Lex/PPLexerChange.cpp (original)
>> +++ cfe/trunk/lib/Lex/PPLexerChange.cpp Thu Oct 27 05:51:29 2016
>> @@ -41,7 +41,7 @@ bool Preprocessor::isInPrimaryFile() con
>>  assert(IsFileLexer(IncludeMacroStack[0]) &&
>> "Top level include stack isn't our primary lexer?");
>>  return std::none_of(IncludeMacroStack.begin() + 1, IncludeMacroStack.end(),
>> -  [](const IncludeStackInfo ) -> bool {
>> +  [this](const IncludeStackInfo ) -> bool {
>>return IsFileLexer(ISI);
>>  });
>> }
>> 
>> 
>> ___
>> cfe-commits mailing list
>> cfe-commits@lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
> 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r285281 - Fix MSVC warning about missing 'this' from lambda default capture mode

2016-10-27 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu Oct 27 05:51:29 2016
New Revision: 285281

URL: http://llvm.org/viewvc/llvm-project?rev=285281=rev
Log:
Fix MSVC warning about missing 'this' from lambda default capture mode

Modified:
cfe/trunk/lib/Lex/PPLexerChange.cpp

Modified: cfe/trunk/lib/Lex/PPLexerChange.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPLexerChange.cpp?rev=285281=285280=285281=diff
==
--- cfe/trunk/lib/Lex/PPLexerChange.cpp (original)
+++ cfe/trunk/lib/Lex/PPLexerChange.cpp Thu Oct 27 05:51:29 2016
@@ -41,7 +41,7 @@ bool Preprocessor::isInPrimaryFile() con
   assert(IsFileLexer(IncludeMacroStack[0]) &&
  "Top level include stack isn't our primary lexer?");
   return std::none_of(IncludeMacroStack.begin() + 1, IncludeMacroStack.end(),
-  [](const IncludeStackInfo ) -> bool {
+  [this](const IncludeStackInfo ) -> bool {
 return IsFileLexer(ISI);
   });
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r285067 - Fix MSVC unused variable warning.

2016-10-25 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Tue Oct 25 07:59:15 2016
New Revision: 285067

URL: http://llvm.org/viewvc/llvm-project?rev=285067=rev
Log:
Fix MSVC unused variable warning.

LLVM_ATTRIBUTE_UNUSED doesn't work for non-gcc style compilers.

Modified:
cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp

Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp?rev=285067=285066=285067=diff
==
--- cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp 
(original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp Tue 
Oct 25 07:59:15 2016
@@ -464,10 +464,11 @@ void StdLibraryFunctionsChecker::initFun
   QualType SSizeTy = ACtx.getIntTypeForBitwidth(ACtx.getTypeSize(SizeTy), 
true);
 
   // Don't worry about truncation here, it'd be cast back to SIZE_MAX when 
used.
-  LLVM_ATTRIBUTE_UNUSED int64_t SizeMax =
+  int64_t SizeMax =
   BVF.getMaxValue(SizeTy).getLimitedValue();
   int64_t SSizeMax =
 BVF.getMaxValue(SSizeTy).getLimitedValue();
+  (void)SizeMax;
 
   // We are finally ready to define specifications for all supported functions.
   //


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] r284476 - Fix signed/unsigned comparison warnings

2016-10-18 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Tue Oct 18 08:15:31 2016
New Revision: 284476

URL: http://llvm.org/viewvc/llvm-project?rev=284476=rev
Log:
Fix signed/unsigned comparison warnings

Modified:
clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp
clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp

Modified: clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp
URL: 
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp?rev=284476=284475=284476=diff
==
--- clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp 
(original)
+++ clang-tools-extra/trunk/unittests/clang-tidy/NamespaceAliaserTest.cpp Tue 
Oct 18 08:15:31 2016
@@ -51,7 +51,7 @@ private:
 };
 
 template 
-std::string runChecker(StringRef Code, int ExpectedWarningCount) {
+std::string runChecker(StringRef Code, unsigned ExpectedWarningCount) {
   std::map AdditionalFileContents = {{"foo.h",
 "namespace foo {\n"
 "namespace bar {\n"

Modified: clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp
URL: 
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp?rev=284476=284475=284476=diff
==
--- clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp 
(original)
+++ clang-tools-extra/trunk/unittests/clang-tidy/UsingInserterTest.cpp Tue Oct 
18 08:15:31 2016
@@ -53,7 +53,7 @@ private:
 };
 
 template 
-std::string runChecker(StringRef Code, int ExpectedWarningCount) {
+std::string runChecker(StringRef Code, unsigned ExpectedWarningCount) {
   std::map AdditionalFileContents = {{"foo.h",
 "namespace foo {\n"
 "namespace bar {\n"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r283549 - Wdocumentation fix

2016-10-07 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Oct  7 08:25:41 2016
New Revision: 283549

URL: http://llvm.org/viewvc/llvm-project?rev=283549=rev
Log:
Wdocumentation fix

Modified:
cfe/trunk/include/clang/Sema/Sema.h

Modified: cfe/trunk/include/clang/Sema/Sema.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=283549=283548=283549=diff
==
--- cfe/trunk/include/clang/Sema/Sema.h (original)
+++ cfe/trunk/include/clang/Sema/Sema.h Fri Oct  7 08:25:41 2016
@@ -8837,16 +8837,16 @@ public:
 
   /// Check assignment constraints for an assignment of RHS to LHSType.
   ///
-  /// \brief LHSType The destination type for the assignment.
-  /// \brief RHS The source expression for the assignment.
-  /// \brief Diagnose If \c true, diagnostics may be produced when checking
+  /// \param LHSType The destination type for the assignment.
+  /// \param RHS The source expression for the assignment.
+  /// \param Diagnose If \c true, diagnostics may be produced when checking
   ///for assignability. If a diagnostic is produced, \p RHS will be
   ///set to ExprError(). Note that this function may still return
   ///without producing a diagnostic, even for an invalid assignment.
-  /// \brief DiagnoseCFAudited If \c true, the target is a function parameter
+  /// \param DiagnoseCFAudited If \c true, the target is a function parameter
   ///in an audited Core Foundation API and does not need to be checked
   ///for ARC retain issues.
-  /// \brief ConvertRHS If \c true, \p RHS will be updated to model the
+  /// \param ConvertRHS If \c true, \p RHS will be updated to model the
   ///conversions necessary to perform the assignment. If \c false,
   ///\p Diagnose must also be \c false.
   AssignConvertType CheckSingleAssignmentConstraints(


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r283106 - Wdocumentation fix

2016-10-03 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon Oct  3 07:37:08 2016
New Revision: 283106

URL: http://llvm.org/viewvc/llvm-project?rev=283106=rev
Log:
Wdocumentation fix

Modified:
cfe/trunk/lib/Analysis/CloneDetection.cpp

Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=283106=283105=283106=diff
==
--- cfe/trunk/lib/Analysis/CloneDetection.cpp (original)
+++ cfe/trunk/lib/Analysis/CloneDetection.cpp Mon Oct  3 07:37:08 2016
@@ -110,7 +110,7 @@ class VariablePattern {
 
   /// \brief Adds a new variable referenced to this pattern.
   /// \param VarDecl The declaration of the variable that is referenced.
-  /// \param Range The SourceRange where this variable is referenced.
+  /// \param Mention The statement in the code where the variable was 
referenced.
   void addVariableOccurence(const VarDecl *VarDecl, const Stmt *Mention) {
 // First check if we already reference this variable
 for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r282858 - Strip trailing whitespace (NFCI)

2016-09-30 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Sep 30 09:25:09 2016
New Revision: 282858

URL: http://llvm.org/viewvc/llvm-project?rev=282858=rev
Log:
Strip trailing whitespace (NFCI)

Modified:
cfe/trunk/lib/Sema/SemaExprCXX.cpp

Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=282858=282857=282858=diff
==
--- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Sep 30 09:25:09 2016
@@ -292,7 +292,7 @@ ParsedType Sema::getDestructorName(Sourc
   if (isDependent) {
 // We didn't find our type, but that's okay: it's dependent
 // anyway.
-
+
 // FIXME: What if we have no nested-name-specifier?
 QualType T = CheckTypenameType(ETK_None, SourceLocation(),
SS.getWithLocInContext(Context),
@@ -326,14 +326,14 @@ ParsedType Sema::getDestructorName(Sourc
 ParsedType Sema::getDestructorType(const DeclSpec& DS, ParsedType ObjectType) {
 if (DS.getTypeSpecType() == DeclSpec::TST_error || !ObjectType)
   return nullptr;
-assert(DS.getTypeSpecType() == DeclSpec::TST_decltype 
+assert(DS.getTypeSpecType() == DeclSpec::TST_decltype
&& "only get destructor types from declspecs");
 QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
 QualType SearchType = GetTypeFromParser(ObjectType);
 if (SearchType->isDependentType() || 
Context.hasSameUnqualifiedType(SearchType, T)) {
   return ParsedType::make(T);
 }
-  
+
 Diag(DS.getTypeSpecTypeLoc(), diag::err_destructor_expr_type_mismatch)
   << T << SearchType;
 return nullptr;
@@ -662,7 +662,7 @@ Sema::ActOnCXXThrow(Scope *S, SourceLoca
   IsThrownVarInScope = true;
   break;
 }
-
+
 if (S->getFlags() &
 (Scope::FnScope | Scope::ClassScope | Scope::BlockScope |
  Scope::FunctionPrototypeScope | Scope::ObjCMethodScope |
@@ -672,11 +672,11 @@ Sema::ActOnCXXThrow(Scope *S, SourceLoca
 }
   }
   }
-  
+
   return BuildCXXThrow(OpLoc, Ex, IsThrownVarInScope);
 }
 
-ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex, 
+ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex,
bool IsThrownVarInScope) {
   // Don't report an error if 'throw' is used in system headers.
   if (!getLangOpts().CXXExceptions &&
@@ -907,10 +907,10 @@ static QualType adjustCVQualifiersForCXX
I-- && isa(FunctionScopes[I]);
CurDC = getLambdaAwareParentOfDeclContext(CurDC)) {
 CurLSI = cast(FunctionScopes[I]);
-
-if (!CurLSI->isCXXThisCaptured()) 
+
+if (!CurLSI->isCXXThisCaptured())
 continue;
-  
+
 auto C = CurLSI->getCXXThisCapture();
 
 if (C.isCopyCapture()) {
@@ -926,7 +926,7 @@ static QualType adjustCVQualifiersForCXX
 assert(CurLSI);
 assert(isGenericLambdaCallOperatorSpecialization(CurLSI->CallOperator));
 assert(CurDC == getLambdaAwareParentOfDeclContext(CurLSI->CallOperator));
-
+
 auto IsThisCaptured =
 [](CXXRecordDecl *Closure, bool , bool ) {
   IsConst = false;
@@ -996,10 +996,10 @@ QualType Sema::getCurrentThisType() {
   return ThisTy;
 }
 
-Sema::CXXThisScopeRAII::CXXThisScopeRAII(Sema , 
+Sema::CXXThisScopeRAII::CXXThisScopeRAII(Sema ,
  Decl *ContextDecl,
  unsigned CXXThisTypeQuals,
- bool Enabled) 
+ bool Enabled)
   : S(S), OldCXXThisTypeOverride(S.CXXThisTypeOverride), Enabled(false)
 {
   if (!Enabled || !ContextDecl)
@@ -1010,13 +1010,13 @@ Sema::CXXThisScopeRAII::CXXThisScopeRAII
 Record = Template->getTemplatedDecl();
   else
 Record = cast(ContextDecl);
-
+
   // We care only for CVR qualifiers here, so cut everything else.
   CXXThisTypeQuals &= Qualifiers::FastMask;
   S.CXXThisTypeOverride
 = S.Context.getPointerType(
 S.Context.getRecordType(Record).withCVRQualifiers(CXXThisTypeQuals));
-  
+
   this->Enabled = true;
 }
 
@@ -1030,7 +1030,7 @@ Sema::CXXThisScopeRAII::~CXXThisScopeRAI
 static Expr *captureThis(Sema , ASTContext , RecordDecl *RD,
  QualType ThisTy, SourceLocation Loc,
  const bool ByCopy) {
- 
+
   QualType AdjustedThisTy = ThisTy;
   // The type of the corresponding data member (not a 'this' pointer if 'by
   // copy').
@@ -1043,7 +1043,7 @@ static Expr *captureThis(Sema , ASTCon
 CaptureThisFieldTy.removeLocalCVRQualifiers(Qualifiers::CVRMask);
 AdjustedThisTy = Context.getPointerType(CaptureThisFieldTy);
   }
-  
+
   FieldDecl *Field = FieldDecl::Create(
   Context, RD, Loc, Loc, nullptr, CaptureThisFieldTy,
   Context.getTrivialTypeSourceInfo(CaptureThisFieldTy, Loc), 

r282857 - Fix int <= bool comparison warning on MSVC

2016-09-30 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Sep 30 09:18:06 2016
New Revision: 282857

URL: http://llvm.org/viewvc/llvm-project?rev=282857=rev
Log:
Fix int <= bool comparison warning on MSVC

Modified:
cfe/trunk/lib/Sema/SemaExprCXX.cpp

Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=282857=282856=282857=diff
==
--- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Sep 30 09:18:06 2016
@@ -2366,11 +2366,14 @@ void Sema::DeclareGlobalNewDelete() {
 bool HasSizedVariant = getLangOpts().SizedDeallocation &&
(Kind == OO_Delete || Kind == OO_Array_Delete);
 bool HasAlignedVariant = getLangOpts().CPlusPlus1z;
-for (int Sized = 0; Sized <= HasSizedVariant; ++Sized) {
+
+int NumSizeVariants = (HasSizedVariant ? 2 : 1);
+int NumAlignVariants = (HasAlignedVariant ? 2 : 1);
+for (int Sized = 0; Sized < NumSizeVariants; ++Sized) {
   if (Sized)
 Params.push_back(SizeT);
 
-  for (int Aligned = 0; Aligned <= HasAlignedVariant; ++Aligned) {
+  for (int Aligned = 0; Aligned < NumAlignVariants; ++Aligned) {
 if (Aligned)
   Params.push_back(Context.getTypeDeclType(getStdAlignValT()));
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21021: [Clang][AVX512][BuiltIn]Adding intrinsics move_{sd|ss} to clang

2016-09-16 Thread Simon Pilgrim via cfe-commits
RKSimon added a subscriber: RKSimon.


Comment at: lib/Headers/avx512fintrin.h:9124
@@ +9123,3 @@
+{
+  return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B,
+   (__v4sf) __W,

delena wrote:
> please try the following:
> if (__U)
>   return __builtin_shuffle(A, B, (0, 5, 6, 7)); // may be you need to swap A 
> and B 
>  return W;
> 
> I know that the immediate code will be less optimal, but we can optimize it 
> later.
Any update on this? I currently have a patch (D24653) looking at removing the 
movss/movsd mask intrinsics as we should be able to do this with purely generic 
shuffles. I can help with the optimization if necessary.


https://reviews.llvm.org/D21021



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r280921 - Moved unreachable to appease msvc, gcc and clang

2016-09-08 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu Sep  8 06:03:41 2016
New Revision: 280921

URL: http://llvm.org/viewvc/llvm-project?rev=280921=rev
Log:
Moved unreachable to appease msvc, gcc and clang

Modified:
cfe/trunk/lib/CodeGen/CGVTables.cpp

Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTables.cpp?rev=280921=280920=280921=diff
==
--- cfe/trunk/lib/CodeGen/CGVTables.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGVTables.cpp Thu Sep  8 06:03:41 2016
@@ -529,9 +529,6 @@ llvm::Constant *CodeGenVTables::CreateVT
   };
 
   switch (Component.getKind()) {
-  default:
-llvm_unreachable("Unexpected vtable component kind");
-
   case VTableComponent::CK_VCallOffset:
 return OffsetConstant(Component.getVCallOffset());
 
@@ -619,6 +616,8 @@ llvm::Constant *CodeGenVTables::CreateVT
   case VTableComponent::CK_UnusedFunctionPointer:
 return llvm::ConstantExpr::getNullValue(CGM.Int8PtrTy);
   }
+
+  llvm_unreachable("Unexpected vtable component kind");
 }
 
 llvm::Constant *


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r280917 - Fixed a 'not all control paths return a value' warning on MSVC builds

2016-09-08 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu Sep  8 04:59:58 2016
New Revision: 280917

URL: http://llvm.org/viewvc/llvm-project?rev=280917=rev
Log:
Fixed a 'not all control paths return a value' warning on MSVC builds

Modified:
cfe/trunk/lib/CodeGen/CGVTables.cpp

Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTables.cpp?rev=280917=280916=280917=diff
==
--- cfe/trunk/lib/CodeGen/CGVTables.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGVTables.cpp Thu Sep  8 04:59:58 2016
@@ -29,7 +29,7 @@ using namespace CodeGen;
 CodeGenVTables::CodeGenVTables(CodeGenModule )
 : CGM(CGM), VTContext(CGM.getContext().getVTableContext()) {}
 
-llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD, 
+llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD,
   const ThunkInfo ) {
   const CXXMethodDecl *MD = cast(GD.getDecl());
 
@@ -93,7 +93,7 @@ static RValue PerformReturnAdjustment(Co
 AdjustNull = CGF.createBasicBlock("adjust.null");
 AdjustNotNull = CGF.createBasicBlock("adjust.notnull");
 AdjustEnd = CGF.createBasicBlock("adjust.end");
-  
+
 llvm::Value *IsNull = CGF.Builder.CreateIsNull(ReturnValue);
 CGF.Builder.CreateCondBr(IsNull, AdjustNull, AdjustNotNull);
 CGF.EmitBlock(AdjustNotNull);
@@ -110,14 +110,14 @@ static RValue PerformReturnAdjustment(Co
 CGF.EmitBlock(AdjustNull);
 CGF.Builder.CreateBr(AdjustEnd);
 CGF.EmitBlock(AdjustEnd);
-  
+
 llvm::PHINode *PHI = CGF.Builder.CreatePHI(ReturnValue->getType(), 2);
 PHI->addIncoming(ReturnValue, AdjustNotNull);
-PHI->addIncoming(llvm::Constant::getNullValue(ReturnValue->getType()), 
+PHI->addIncoming(llvm::Constant::getNullValue(ReturnValue->getType()),
  AdjustNull);
 ReturnValue = PHI;
   }
-  
+
   return RValue::get(ReturnValue);
 }
 
@@ -314,7 +314,7 @@ void CodeGenFunction::EmitCallAndReturnF
   CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
   !hasScalarEvaluationKind(CurFnInfo->getReturnType()))
 Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified());
-  
+
   // Now emit our call.
   llvm::Instruction *CallOrInvoke;
   RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD, );
@@ -433,14 +433,14 @@ void CodeGenVTables::emitThunk(GlobalDec
 // Remove the name from the old thunk function and get a new thunk.
 OldThunkFn->setName(StringRef());
 Entry = cast(CGM.GetAddrOfThunk(GD, Thunk));
-
+
 // If needed, replace the old thunk with a bitcast.
 if (!OldThunkFn->use_empty()) {
   llvm::Constant *NewPtrForOldDecl =
 llvm::ConstantExpr::getBitCast(Entry, OldThunkFn->getType());
   OldThunkFn->replaceAllUsesWith(NewPtrForOldDecl);
 }
-
+
 // Remove the old thunk.
 OldThunkFn->eraseFromParent();
   }
@@ -500,7 +500,7 @@ void CodeGenVTables::maybeEmitThunkForVT
 
 void CodeGenVTables::EmitThunks(GlobalDecl GD)
 {
-  const CXXMethodDecl *MD = 
+  const CXXMethodDecl *MD =
 cast(GD.getDecl())->getCanonicalDecl();
 
   // We don't need to generate thunks for the base destructor.
@@ -529,6 +529,9 @@ llvm::Constant *CodeGenVTables::CreateVT
   };
 
   switch (Component.getKind()) {
+  default:
+llvm_unreachable("Unexpected vtable component kind");
+
   case VTableComponent::CK_VCallOffset:
 return OffsetConstant(Component.getVCallOffset());
 
@@ -636,9 +639,9 @@ CodeGenVTables::CreateVTableInitializer(
 }
 
 llvm::GlobalVariable *
-CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, 
-  const BaseSubobject , 
-  bool BaseIsVirtual, 
+CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
+  const BaseSubobject ,
+  bool BaseIsVirtual,
llvm::GlobalVariable::LinkageTypes Linkage,
   VTableAddressPointsMapTy& AddressPoints) 
{
   if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
@@ -671,7 +674,7 @@ CodeGenVTables::GenerateConstructionVTab
 Linkage = llvm::GlobalVariable::InternalLinkage;
 
   // Create the variable that will hold the construction vtable.
-  llvm::GlobalVariable *VTable = 
+  llvm::GlobalVariable *VTable =
 CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, Linkage);
   CGM.setGlobalVisibility(VTable, RD);
 
@@ -684,7 +687,7 @@ CodeGenVTables::GenerateConstructionVTab
   // Create and set the initializer.
   llvm::Constant *Init = CreateVTableInitializer(*VTLayout, RTTI);
   VTable->setInitializer(Init);
-  
+
   CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get());
 
   return VTable;
@@ -699,7 +702,7 @@ static bool shouldEmitAvailableExternall
 /// Compute the required linkage of the vtable for the given class.
 ///
 /// Note that we 

r279382 - Wdocumentation fix

2016-08-20 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Sat Aug 20 15:21:27 2016
New Revision: 279382

URL: http://llvm.org/viewvc/llvm-project?rev=279382=rev
Log:
Wdocumentation fix

Modified:
cfe/trunk/lib/Analysis/CloneDetection.cpp

Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=279382=279381=279382=diff
==
--- cfe/trunk/lib/Analysis/CloneDetection.cpp (original)
+++ cfe/trunk/lib/Analysis/CloneDetection.cpp Sat Aug 20 15:21:27 2016
@@ -295,7 +295,7 @@ public:
   /// \brief Collects data of the given Stmt.
   /// \param S The given statement.
   /// \param Context The ASTContext of S.
-  /// \param D The data sink to which all data is forwarded.
+  /// \param DataConsumer The data sink to which all data is forwarded.
   StmtDataCollector(const Stmt *S, ASTContext , T )
   : Context(Context), DataConsumer(DataConsumer) {
 this->Visit(S);
@@ -695,7 +695,7 @@ static bool areSequencesClones(const Stm
 /// \param Group A group of presumed clones. The clones are allowed to have a
 ///  different variable pattern and may not be actual clones of 
each
 ///  other.
-/// \param CheckVariablePatterns If true, every clone in a group that was added
+/// \param CheckVariablePattern If true, every clone in a group that was added
 ///  to the output follows the same variable pattern as the other
 ///  clones in its group.
 static void createCloneGroups(std::vector ,


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r278503 - Fix Wdocumentation unknown parameter warning

2016-08-12 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Aug 12 06:43:57 2016
New Revision: 278503

URL: http://llvm.org/viewvc/llvm-project?rev=278503=rev
Log:
Fix Wdocumentation unknown parameter warning

Modified:
cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp

Modified: cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp?rev=278503=278502=278503=diff
==
--- cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp (original)
+++ cfe/trunk/lib/Sema/SemaTemplateDeduction.cpp Fri Aug 12 06:43:57 2016
@@ -863,12 +863,12 @@ static bool hasInconsistentOrSupersetQua
 
   if (ParamQs == ArgQs)
 return false;
-   
+
   // Mismatched (but not missing) Objective-C GC attributes.
-  if (ParamQs.getObjCGCAttr() != ArgQs.getObjCGCAttr() && 
+  if (ParamQs.getObjCGCAttr() != ArgQs.getObjCGCAttr() &&
   ParamQs.hasObjCGCAttr())
 return true;
-  
+
   // Mismatched (but not missing) address spaces.
   if (ParamQs.getAddressSpace() != ArgQs.getAddressSpace() &&
   ParamQs.hasAddressSpace())
@@ -878,7 +878,7 @@ static bool hasInconsistentOrSupersetQua
   if (ParamQs.getObjCLifetime() != ArgQs.getObjCLifetime() &&
   ParamQs.hasObjCLifetime())
 return true;
-  
+
   // CVR qualifier superset.
   return (ParamQs.getCVRQualifiers() != ArgQs.getCVRQualifiers()) &&
   ((ParamQs.getCVRQualifiers() | ArgQs.getCVRQualifiers())
@@ -1060,7 +1060,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema
 // Just skip any attempts to deduce from a placeholder type.
 if (Arg->isPlaceholderType())
   return Sema::TDK_Success;
-
+
 unsigned Index = TemplateTypeParm->getIndex();
 bool RecanonicalizeArg = false;
 
@@ -1100,7 +1100,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema
   DeducedQs.removeAddressSpace();
 if (ParamQs.hasObjCLifetime())
   DeducedQs.removeObjCLifetime();
-
+
 // Objective-C ARC:
 //   If template deduction would produce a lifetime qualifier on a type
 //   that is not a lifetime type, template argument deduction fails.
@@ -1109,9 +1109,9 @@ DeduceTemplateArgumentsByTypeMatch(Sema
   Info.Param = cast(TemplateParams->getParam(Index));
   Info.FirstArg = TemplateArgument(Param);
   Info.SecondArg = TemplateArgument(Arg);
-  return Sema::TDK_Underqualified;  
+  return Sema::TDK_Underqualified;
 }
-
+
 // Objective-C ARC:
 //   If template deduction would produce an argument type with lifetime 
type
 //   but no lifetime qualifier, the __strong lifetime qualifier is 
inferred.
@@ -1119,10 +1119,10 @@ DeduceTemplateArgumentsByTypeMatch(Sema
 DeducedType->isObjCLifetimeType() &&
 !DeducedQs.hasObjCLifetime())
   DeducedQs.setObjCLifetime(Qualifiers::OCL_Strong);
-
+
 DeducedType = S.Context.getQualifiedType(DeducedType.getUnqualifiedType(),
  DeducedQs);
-
+
 if (RecanonicalizeArg)
   DeducedType = S.Context.getCanonicalType(DeducedType);
 
@@ -1163,7 +1163,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema
   if (Param.getCVRQualifiers() != Arg.getCVRQualifiers())
 return Sema::TDK_NonDeducedMismatch;
 }
-
+
 // If the parameter type is not dependent, there is nothing to deduce.
 if (!Param->isDependentType()) {
   if (!(TDF & TDF_SkipNonDependent)) {
@@ -1193,7 +1193,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema
   case Type::Class: llvm_unreachable("deducing non-canonical type: " #Class);
 #define TYPE(Class, Base)
 #include "clang/AST/TypeNodes.def"
-  
+
 case Type::TemplateTypeParm:
 case Type::SubstTemplateTypeParmPack:
   llvm_unreachable("Type nodes handled above");
@@ -1211,20 +1211,20 @@ DeduceTemplateArgumentsByTypeMatch(Sema
 case Type::ObjCObjectPointer: {
   if (TDF & TDF_SkipNonDependent)
 return Sema::TDK_Success;
-  
+
   if (TDF & TDF_IgnoreQualifiers) {
 Param = Param.getUnqualifiedType();
 Arg = Arg.getUnqualifiedType();
   }
-
+
   return Param == Arg? Sema::TDK_Success : Sema::TDK_NonDeducedMismatch;
 }
-  
-// _Complex T   [placeholder extension]  
+
+// _Complex T   [placeholder extension]
 case Type::Complex:
   if (const ComplexType *ComplexArg = Arg->getAs())
-return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, 
-
cast(Param)->getElementType(), 
+return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
+cast(Param)->getElementType(),
 ComplexArg->getElementType(),
 Info, Deduced, TDF);
 
@@ -1549,7 +1549,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema
   return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,

r278208 - [X86][AVX] Ensure we only match against 1-byte alignment

2016-08-10 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed Aug 10 04:59:49 2016
New Revision: 278208

URL: http://llvm.org/viewvc/llvm-project?rev=278208=rev
Log:
[X86][AVX] Ensure we only match against 1-byte alignment

Modified:
cfe/trunk/test/CodeGen/avx-builtins.c

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=278208=278207=278208=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Aug 10 04:59:49 2016
@@ -84,14 +84,14 @@ __m256 test_mm256_blendv_ps(__m256 V1, _
 
 __m256d test_mm256_broadcast_pd(__m128d* A) {
   // CHECK-LABEL: test_mm256_broadcast_pd
-  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x 
i32> 
   return _mm256_broadcast_pd(A);
 }
 
 __m256 test_mm256_broadcast_ps(__m128* A) {
   // CHECK-LABEL: test_mm256_broadcast_ps
-  // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1
+  // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> 

   return _mm256_broadcast_ps(A);
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] r278111 - Fix Wdocumentation unknown parameter warning

2016-08-09 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Tue Aug  9 05:02:11 2016
New Revision: 278111

URL: http://llvm.org/viewvc/llvm-project?rev=278111=rev
Log:
Fix Wdocumentation unknown parameter warning

Modified:
clang-tools-extra/trunk/include-fixer/IncludeFixer.h

Modified: clang-tools-extra/trunk/include-fixer/IncludeFixer.h
URL: 
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/include-fixer/IncludeFixer.h?rev=278111=278110=278111=diff
==
--- clang-tools-extra/trunk/include-fixer/IncludeFixer.h (original)
+++ clang-tools-extra/trunk/include-fixer/IncludeFixer.h Tue Aug  9 05:02:11 
2016
@@ -30,7 +30,7 @@ namespace include_fixer {
 class IncludeFixerActionFactory : public clang::tooling::ToolAction {
 public:
   /// \param SymbolIndexMgr A source for matching symbols to header files.
-  /// \param Context A context for the symbol being queried.
+  /// \param Contexts The contexts for the symbols being queried.
   /// \param StyleName Fallback style for reformatting.
   /// \param MinimizeIncludePaths whether inserted include paths are optimized.
   IncludeFixerActionFactory(SymbolIndexManager ,


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r276889 - Fix unnecessary default switch warning

2016-07-27 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed Jul 27 11:41:56 2016
New Revision: 276889

URL: http://llvm.org/viewvc/llvm-project?rev=276889=rev
Log:
Fix unnecessary default switch warning

Modified:
cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp

Modified: cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp?rev=276889=276888=276889=diff
==
--- cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp (original)
+++ cfe/trunk/lib/Sema/CodeCompleteConsumer.cpp Wed Jul 27 11:41:56 2016
@@ -445,8 +445,8 @@ bool PrintingCodeCompleteConsumer::isRes
   case CodeCompletionResult::RK_Pattern: {
 return !StringRef(Result.Pattern->getAsString()).startswith(Filter);
   }
-  default: llvm_unreachable("Unknown code completion result Kind.");
   }
+  llvm_unreachable("Unknown code completion result Kind.");
 }
 
 void 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r276417 - [X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 with generic IR

2016-07-22 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Jul 22 08:58:56 2016
New Revision: 276417

URL: http://llvm.org/viewvc/llvm-project?rev=276417=rev
Log:
[X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 with 
generic IR

As discussed on D22460, I've updated the vbroadcastf128 pd256/ps256 builtins to 
map directly to generic IR - load+splat a 128-bit vector to both lanes of a 
256-bit vector.

Fix for PR28657.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=276417=276416=276417=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jul 22 08:58:56 2016
@@ -6619,6 +6619,26 @@ static Value *EmitX86MaskedLoad(CodeGenF
   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
 }
 
+static Value *EmitX86SubVectorBroadcast(CodeGenFunction ,
+SmallVectorImpl ,
+llvm::Type *DstTy,
+unsigned SrcSizeInBits,
+unsigned Align) {
+  // Load the subvector.
+  Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
+
+  // Create broadcast mask.
+  unsigned NumDstElts = DstTy->getVectorNumElements();
+  unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
+
+  SmallVector Mask;
+  for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
+for (unsigned j = 0; j != NumSrcElts; ++j)
+  Mask.push_back(j);
+
+  return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
+}
+
 static Value *EmitX86Select(CodeGenFunction ,
 Value *Mask, Value *Op0, Value *Op1) {
 
@@ -6995,6 +7015,13 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
 return EmitX86MaskedLoad(*this, Ops, Align);
   }
+
+  case X86::BI__builtin_ia32_vbroadcastf128_pd256:
+  case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
+llvm::Type *DstTy = ConvertType(E->getType());
+return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 16);
+  }
+
   case X86::BI__builtin_ia32_storehps:
   case X86::BI__builtin_ia32_storelps: {
 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=276417=276416=276417=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Fri Jul 22 08:58:56 2016
@@ -84,13 +84,15 @@ __m256 test_mm256_blendv_ps(__m256 V1, _
 
 __m256d test_mm256_broadcast_pd(__m128d* A) {
   // CHECK-LABEL: test_mm256_broadcast_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %{{.*}})
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x 
i32> 
   return _mm256_broadcast_pd(A);
 }
 
 __m256 test_mm256_broadcast_ps(__m128* A) {
   // CHECK-LABEL: test_mm256_broadcast_ps
-  // CHECK: call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %{{.*}})
+  // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> 

   return _mm256_broadcast_ps(A);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR

2016-07-20 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL276102: [X86][SSE] Reimplement SSE fp2si conversion 
intrinsics instead of using… (authored by RKSimon).

Changed prior to commit:
  https://reviews.llvm.org/D22105?vs=64534=64653#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D22105

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/lib/Headers/avxintrin.h
  cfe/trunk/lib/Headers/emmintrin.h
  cfe/trunk/lib/Headers/xmmintrin.h
  cfe/trunk/test/CodeGen/avx-builtins.c
  cfe/trunk/test/CodeGen/builtins-x86.c
  cfe/trunk/test/CodeGen/sse-builtins.c
  cfe/trunk/test/CodeGen/sse2-builtins.c

Index: cfe/trunk/lib/Headers/xmmintrin.h
===
--- cfe/trunk/lib/Headers/xmmintrin.h
+++ cfe/trunk/lib/Headers/xmmintrin.h
@@ -1350,7 +1350,7 @@
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvttss_si32(__m128 __a)
 {
-  return __a[0];
+  return __builtin_ia32_cvttss2si((__v4sf)__a);
 }
 
 /// \brief Converts a float value contained in the lower 32 bits of a vector of
@@ -1386,7 +1386,7 @@
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvttss_si64(__m128 __a)
 {
-  return __a[0];
+  return __builtin_ia32_cvttss2si64((__v4sf)__a);
 }
 
 /// \brief Converts two low-order float values in a 128-bit vector of
Index: cfe/trunk/lib/Headers/avxintrin.h
===
--- cfe/trunk/lib/Headers/avxintrin.h
+++ cfe/trunk/lib/Headers/avxintrin.h
@@ -2117,7 +2117,7 @@
 static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_cvttpd_epi32(__m256d __a)
 {
-  return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
+  return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS
@@ -2129,7 +2129,7 @@
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvttps_epi32(__m256 __a)
 {
-  return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
+  return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
 }
 
 static __inline double __DEFAULT_FN_ATTRS
Index: cfe/trunk/lib/Headers/emmintrin.h
===
--- cfe/trunk/lib/Headers/emmintrin.h
+++ cfe/trunk/lib/Headers/emmintrin.h
@@ -417,8 +417,7 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtsd_ss(__m128 __a, __m128d __b)
 {
-  __a[0] = __b[0];
-  return __a;
+  return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -444,7 +443,7 @@
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvttsd_si32(__m128d __a)
 {
-  return __a[0];
+  return __builtin_ia32_cvttsd2si((__v2df)__a);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -1707,7 +1706,7 @@
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvttsd_si64(__m128d __a)
 {
-  return __a[0];
+  return __builtin_ia32_cvttsd2si64((__v2df)__a);
 }
 #endif
 
@@ -1755,7 +1754,7 @@
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvttps_epi32(__m128 __a)
 {
-  return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si);
+  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
 }
 
 /// \brief Returns a vector of [4 x i32] where the lowest element is the input
Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def
@@ -303,7 +303,9 @@
 TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse")
@@ -328,8 +330,12 @@
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
@@ -455,7 +461,9 @@
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, 

r276102 - [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR

2016-07-20 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed Jul 20 05:18:01 2016
New Revision: 276102

URL: http://llvm.org/viewvc/llvm-project?rev=276102=rev
Log:
[X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic 
IR

D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ 
truncating conversions with generic IR instead.

It turns out that the behaviour of these intrinsics is different enough from 
generic IR that this will cause problems, INF/NAN/out of range values are 
guaranteed to result in a 0x8000 value - which plays havoc with constant 
folding which converts them to either zero or UNDEF. This is also an issue with 
the scalar implementations (which were already generic IR and what I was trying 
to match).

This patch changes both scalar and packed versions back to using x86-specific 
builtins.

It also deals with the other scalar conversion cases that are runtime rounding 
mode dependent and can have similar issues with constant folding.

Differential Revision: https://reviews.llvm.org/D22105

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/xmmintrin.h
cfe/trunk/test/CodeGen/avx-builtins.c
cfe/trunk/test/CodeGen/builtins-x86.c
cfe/trunk/test/CodeGen/sse-builtins.c
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=276102=276101=276102=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jul 20 05:18:01 2016
@@ -303,7 +303,9 @@ TARGET_BUILTIN(__builtin_ia32_pabsd128,
 TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse")
@@ -328,8 +330,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtpd2dq,
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
@@ -455,7 +461,9 @@ TARGET_BUILTIN(__builtin_ia32_cmpss, "V4
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")

Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=276102=276101=276102=diff
==
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Wed Jul 20 05:18:01 2016
@@ -2117,7 +2117,7 @@ _mm256_cvtps_pd(__m128 __a)
 static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_cvttpd_epi32(__m256d __a)
 {
-  return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
+  return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS
@@ -2129,7 +2129,7 @@ _mm256_cvtpd_epi32(__m256d __a)
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvttps_epi32(__m256 __a)
 {
-  return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
+  return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
 }
 
 static __inline double __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=276102=276101=276102=diff

Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR

2016-07-19 Thread Simon Pilgrim via cfe-commits
RKSimon updated this revision to Diff 64534.
RKSimon added a comment.

Removed sitofp conversion changes


Repository:
  rL LLVM

https://reviews.llvm.org/D22105

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  lib/Headers/xmmintrin.h
  test/CodeGen/avx-builtins.c
  test/CodeGen/builtins-x86.c
  test/CodeGen/sse-builtins.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -507,7 +507,7 @@
 
 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
   // CHECK-LABEL: test_mm_cvtsd_ss
-  // CHECK: fptrunc double %{{.*}} to float
+  // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
   return _mm_cvtsd_ss(A, B);
 }
 
@@ -569,21 +569,19 @@
 
 __m128i test_mm_cvttps_epi32(__m128 A) {
   // CHECK-LABEL: test_mm_cvttps_epi32
-  // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
   return _mm_cvttps_epi32(A);
 }
 
 int test_mm_cvttsd_si32(__m128d A) {
   // CHECK-LABEL: test_mm_cvttsd_si32
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: fptosi double %{{.*}} to i32
+  // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
   return _mm_cvttsd_si32(A);
 }
 
 long long test_mm_cvttsd_si64(__m128d A) {
   // CHECK-LABEL: test_mm_cvttsd_si64
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: fptosi double %{{.*}} to i64
+  // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
   return _mm_cvttsd_si64(A);
 }
 
Index: test/CodeGen/sse-builtins.c
===
--- test/CodeGen/sse-builtins.c
+++ test/CodeGen/sse-builtins.c
@@ -295,22 +295,19 @@
 
 int test_mm_cvtt_ss2si(__m128 A) {
   // CHECK-LABEL: test_mm_cvtt_ss2si
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  // CHECK: fptosi float %{{.*}} to i32
+  // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
   return _mm_cvtt_ss2si(A);
 }
 
 int test_mm_cvttss_si32(__m128 A) {
   // CHECK-LABEL: test_mm_cvttss_si32
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  // CHECK: fptosi float %{{.*}} to i32
+  // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
   return _mm_cvttss_si32(A);
 }
 
 long long test_mm_cvttss_si64(__m128 A) {
   // CHECK-LABEL: test_mm_cvttss_si64
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  // CHECK: fptosi float %{{.*}} to i64
+  // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}})
   return _mm_cvttss_si64(A);
 }
 
Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -287,12 +287,14 @@
   tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
   tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
   tmp_i = __builtin_ia32_cvtss2si(tmp_V4f);
+  tmp_i = __builtin_ia32_cvttss2si(tmp_V4f);
 
   tmp_i = __builtin_ia32_rdtsc();
   tmp_i = __builtin_ia32_rdtscp(_Ui);
   tmp_LLi = __builtin_ia32_rdpmc(tmp_i);
 #ifdef USE_64
   tmp_LLi = __builtin_ia32_cvtss2si64(tmp_V4f);
+  tmp_LLi = __builtin_ia32_cvttss2si64(tmp_V4f);
 #endif
   tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
   (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
@@ -328,10 +330,14 @@
   tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d);
   tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i);
   tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d);
+  tmp_i = __builtin_ia32_cvttsd2si(tmp_V2d);
+  tmp_V4f = __builtin_ia32_cvtsd2ss(tmp_V4f, tmp_V2d);
 #ifdef USE_64
   tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
+  tmp_LLi = __builtin_ia32_cvttsd2si64(tmp_V2d);
 #endif
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
+  tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
   (void) __builtin_ia32_lfence();
   (void) __builtin_ia32_mfence();
@@ -410,7 +416,9 @@
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
+  tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
   tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
+  tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
   tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
   tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
Index: test/CodeGen/avx-builtins.c
===
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -286,13 +286,13 @@
 
 __m128i test_mm256_cvttpd_epi32(__m256d A) {
   // CHECK-LABEL: test_mm256_cvttpd_epi32
-  // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32>
+  // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}})
   return 

Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR

2016-07-19 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In https://reviews.llvm.org/D22105#488566, @eli.friedman wrote:

> The x86-specific operation is affected by the rounding mode... but so is a C 
> cast.  This is specified by Annex F in the C standard.
>
> Of course, you're going to end up with undefined behavior if you actually 
> modify the rounding mode because LLVM and clang don't support FENV_ACCESS at 
> the moment.


OK I'm going to pull the sitofp conversions from this patch - I have other 
concerns about them (i.e. we don't treat scalar + vector the same) that will 
need to be looked at as well.


Repository:
  rL LLVM

https://reviews.llvm.org/D22105



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR

2016-07-19 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In https://reviews.llvm.org/D22105#488513, @eli.friedman wrote:

> I don't think we need to use x86-specific operations for sitofp-like 
> conversions; the C cast is equivalent given that a 32 or 64-bit integer is 
> always in within the range of a 32-bit float.


I think the only situation that lossless conversion occurs is i32->f64, every 
other sitofp conversion could be affected by the rounding control no?


Repository:
  rL LLVM

https://reviews.llvm.org/D22105



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r274799 - Update switch statement to match coding standards.

2016-07-07 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu Jul  7 17:32:26 2016
New Revision: 274799

URL: http://llvm.org/viewvc/llvm-project?rev=274799=rev
Log:
Update switch statement to match coding standards.

Modified:
cfe/trunk/lib/Basic/Targets.cpp

Modified: cfe/trunk/lib/Basic/Targets.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=274799=274798=274799=diff
==
--- cfe/trunk/lib/Basic/Targets.cpp (original)
+++ cfe/trunk/lib/Basic/Targets.cpp Thu Jul  7 17:32:26 2016
@@ -1777,7 +1777,7 @@ public:
   // Set __CUDA_ARCH__ for the GPU specified.
   std::string CUDAArchCode = [this] {
 switch (GPU) {
-default:
+case CudaArch::UNKNOWN:
   assert(false && "No GPU arch when compiling CUDA device code.");
   return "";
 case CudaArch::SM_20:
@@ -1805,6 +1805,7 @@ public:
 case CudaArch::SM_62:
   return "620";
 }
+llvm_unreachable("unhandled CudaArch");
   }();
   Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D22105: [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR

2016-07-07 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: eli.friedman, mkuper, craig.topper, spatel, andreadb.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ 
truncating conversions with generic IR instead. 

It turns out that the behaviour of these intrinsics is different enough from 
generic IR that this will cause problems, INF/NAN/out of range values are 
guaranteed to result in a 0x8000 value - which plays havoc with constant 
folding which converts them to either zero or UNDEF. This is also an issue with 
the scalar implementations (which were already generic IR and what I was trying 
to match).

This patch changes both scalar and packed versions back to using x86-specific 
builtins.

It also deals with the other scalar conversion cases that are runtime rounding 
mode dependent and can have similar issues with constant folding.

A companion llvm patch will be submitted shortly.

Repository:
  rL LLVM

http://reviews.llvm.org/D22105

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  lib/Headers/xmmintrin.h
  test/CodeGen/avx-builtins.c
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/builtins-x86.c
  test/CodeGen/sse-builtins.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -507,7 +507,7 @@
 
 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
   // CHECK-LABEL: test_mm_cvtsd_ss
-  // CHECK: fptrunc double %{{.*}} to float
+  // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
   return _mm_cvtsd_ss(A, B);
 }
 
@@ -541,8 +541,7 @@
 
 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
   // CHECK-LABEL: test_mm_cvtsi64_sd
-  // CHECK: sitofp i64 %{{.*}} to double
-  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+  // CHECK: call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %{{.*}}, i64 %{{.*}})
   return _mm_cvtsi64_sd(A, B);
 }
 
@@ -569,21 +568,19 @@
 
 __m128i test_mm_cvttps_epi32(__m128 A) {
   // CHECK-LABEL: test_mm_cvttps_epi32
-  // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
   return _mm_cvttps_epi32(A);
 }
 
 int test_mm_cvttsd_si32(__m128d A) {
   // CHECK-LABEL: test_mm_cvttsd_si32
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: fptosi double %{{.*}} to i32
+  // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
   return _mm_cvttsd_si32(A);
 }
 
 long long test_mm_cvttsd_si64(__m128d A) {
   // CHECK-LABEL: test_mm_cvttsd_si64
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: fptosi double %{{.*}} to i64
+  // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
   return _mm_cvttsd_si64(A);
 }
 
Index: test/CodeGen/sse-builtins.c
===
--- test/CodeGen/sse-builtins.c
+++ test/CodeGen/sse-builtins.c
@@ -263,15 +263,13 @@
 
 __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
   // CHECK-LABEL: test_mm_cvtsi32_ss
-  // CHECK: sitofp i32 %{{.*}} to float
-  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %{{.*}}, i32 %{{.*}})
   return _mm_cvtsi32_ss(A, B);
 }
 
 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
   // CHECK-LABEL: test_mm_cvtsi64_ss
-  // CHECK: sitofp i64 %{{.*}} to float
-  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %{{.*}}, i64 %{{.*}})
   return _mm_cvtsi64_ss(A, B);
 }
 
@@ -295,22 +293,19 @@
 
 int test_mm_cvtt_ss2si(__m128 A) {
   // CHECK-LABEL: test_mm_cvtt_ss2si
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  // CHECK: fptosi float %{{.*}} to i32
+  // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
   return _mm_cvtt_ss2si(A);
 }
 
 int test_mm_cvttss_si32(__m128 A) {
   // CHECK-LABEL: test_mm_cvttss_si32
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  // CHECK: fptosi float %{{.*}} to i32
+  // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
   return _mm_cvttss_si32(A);
 }
 
 long long test_mm_cvttss_si64(__m128 A) {
   // CHECK-LABEL: test_mm_cvttss_si64
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  // CHECK: fptosi float %{{.*}} to i64
+  // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}})
   return _mm_cvttss_si64(A);
 }
 
Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -286,13 +286,17 @@
 
   tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
   tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
+  tmp_V4f = 

r274748 - Fix "not all control paths return a value" warning on MSVC

2016-07-07 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu Jul  7 06:24:38 2016
New Revision: 274748

URL: http://llvm.org/viewvc/llvm-project?rev=274748=rev
Log:
Fix "not all control paths return a value" warning on MSVC

This time without causing a 'all enums handled' warning on other compilers.

Modified:
cfe/trunk/lib/Basic/Targets.cpp

Modified: cfe/trunk/lib/Basic/Targets.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=274748=274747=274748=diff
==
--- cfe/trunk/lib/Basic/Targets.cpp (original)
+++ cfe/trunk/lib/Basic/Targets.cpp Thu Jul  7 06:24:38 2016
@@ -1777,7 +1777,7 @@ public:
   // Set __CUDA_ARCH__ for the GPU specified.
   std::string CUDAArchCode = [this] {
 switch (GPU) {
-case CudaArch::UNKNOWN:
+default:
   assert(false && "No GPU arch when compiling CUDA device code.");
   return "";
 case CudaArch::SM_20:
@@ -1804,8 +1804,6 @@ public:
   return "610";
 case CudaArch::SM_62:
   return "620";
- default:
-   llvm_unreachable("unhandled CudaArch");
 }
   }();
   Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r274746 - Fix "not all control paths return a value" warning on MSVC

2016-07-07 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu Jul  7 06:12:02 2016
New Revision: 274746

URL: http://llvm.org/viewvc/llvm-project?rev=274746=rev
Log:
Fix "not all control paths return a value" warning on MSVC

Modified:
cfe/trunk/lib/Basic/Targets.cpp

Modified: cfe/trunk/lib/Basic/Targets.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=274746=274745=274746=diff
==
--- cfe/trunk/lib/Basic/Targets.cpp (original)
+++ cfe/trunk/lib/Basic/Targets.cpp Thu Jul  7 06:12:02 2016
@@ -1804,6 +1804,8 @@ public:
   return "610";
 case CudaArch::SM_62:
   return "620";
+ default:
+   llvm_unreachable("unhandled CudaArch");
 }
   }();
   Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r274554 - [X86][AVX512] Remove vector BROADCAST builtins.

2016-07-05 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Tue Jul  5 09:49:31 2016
New Revision: 274554

URL: http://llvm.org/viewvc/llvm-project?rev=274554=rev
Log:
[X86][AVX512] Remove vector BROADCAST builtins.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=274554=274553=274554=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Jul  5 09:49:31 2016
@@ -996,8 +996,6 @@ TARGET_BUILTIN(__builtin_ia32_pmuldq512_
 TARGET_BUILTIN(__builtin_ia32_pmuludq512_mask, "V8LLiV16iV16iV8LLiUc", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd512, "V16iV4iV16iUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq512, "V8LLiV2LLiV8LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", 
"avx512f")
@@ -1908,8 +1906,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastf
 TARGET_BUILTIN(__builtin_ia32_broadcastf64x4_512, "V8dV4dV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_512, "V16iV4iV16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_broadcasti64x4_512, 
"V8LLiV4LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_broadcastsd512, "V8dV2dV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_broadcastss512, "V16fV4fV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_broadcastmb128, "V2LLiUc","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
@@ -1927,19 +1923,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcasti
 TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_256_mask, 
"V4LLiV2LLiV4LLiUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastf32x4_256_mask, 
"V8fV4fV8fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_256_mask, 
"V8iV4iV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastsd256_mask, "V4dV2dV4dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastss128_mask, "V4fV4fV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastss256_mask, "V8fV4fV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_mask, 
"V32sV8sV32sUi","","avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_mask, 
"V16cV16cV16cUs","","avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_mask, 
"V32cV16cV32cUi","","avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_mask, 
"V8sV8sV8sUc","","avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_mask, 
"V16sV8sV16sUs","","avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd128_mask, "V4iV4iV4iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd256_mask, "V8iV4iV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_mask, 
"V2LLiV2LLiV2LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_mask, 
"V4LLiV2LLiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_mask, 
"V64cV16cV64cULLi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, 
"V32shV32sUi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, 
"V16shV16sUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, 
"V8ssV8sUc","","avx512bw,avx512vl")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r274544 - [X86][AVX512] Converted the VBROADCAST intrinsics to generic IR

2016-07-05 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Tue Jul  5 07:59:33 2016
New Revision: 274544

URL: http://llvm.org/viewvc/llvm-project?rev=274544=rev
Log:
[X86][AVX512] Converted the VBROADCAST intrinsics to generic IR

Modified:
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlbwintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c
cfe/trunk/test/CodeGen/avx512vlbw-builtins.c

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=274544=274543=274544=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Tue Jul  5 07:59:33 2016
@@ -2266,25 +2266,28 @@ _mm512_movm_epi16 (__mmask32 __A)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcastb_epi8 (__m128i __A)
 {
-  return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
-   (__v64qi) _mm512_setzero_si512(),
-   (__mmask64) -1);
+  return (__m512i)__builtin_shufflevector((__v16qi) __A,
+  (__v16qi)_mm_undefined_si128(),
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
-   (__v64qi) __O,
-   __M);
+  return (__m512i)__builtin_ia32_selectb_512(__M,
+ (__v64qi) 
_mm512_broadcastb_epi8(__A),
+ (__v64qi) __O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
-   (__v64qi) _mm512_setzero_qi(),
-   __M);
+  return (__m512i)__builtin_ia32_selectb_512(__M,
+ (__v64qi) 
_mm512_broadcastb_epi8(__A),
+ (__v64qi) _mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -2306,25 +2309,26 @@ _mm512_maskz_set1_epi16 (__mmask32 __M,
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcastw_epi16 (__m128i __A)
 {
-  return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
-   (__v32hi) _mm512_setzero_si512(),
-   (__mmask32) -1);
+  return (__m512i)__builtin_shufflevector((__v8hi) __A,
+  (__v8hi)_mm_undefined_si128(),
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
-   (__v32hi) __O,
-   __M);
+  return (__m512i)__builtin_ia32_selectw_512(__M,
+ (__v32hi) 
_mm512_broadcastw_epi16(__A),
+ (__v32hi) __O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
-   (__v32hi) _mm512_setzero_hi(),
-   __M);
+  return (__m512i)__builtin_ia32_selectw_512(__M,
+ (__v32hi) 
_mm512_broadcastw_epi16(__A),
+ (__v32hi) _mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274544=274543=274544=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Jul  5 07:59:33 2016
@@ -195,54 +195,54 @@ _mm512_undefined_epi32(void)
 {
   return (__m512i)__builtin_ia32_undef512();
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcastd_epi32 (__m128i __A)
 {
-  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
-  (__v16si)
-  

r274523 - [X86][AVX512] Converted the VSHUFPD intrinsics to generic IR

2016-07-04 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon Jul  4 16:30:47 2016
New Revision: 274523

URL: http://llvm.org/viewvc/llvm-project?rev=274523=rev
Log:
[X86][AVX512] Converted the VSHUFPD intrinsics to generic IR

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274523=274522=274523=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Jul  4 16:30:47 2016
@@ -5950,6 +5950,7 @@ _mm512_kmov (__mmask16 __A)
 
 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
  __mmask16 __U, __m512i __B)
@@ -7166,23 +7167,27 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m12
   (__v8di)_mm512_setzero_si512(), \
   (__mmask8)(U)); })
 
-#define _mm512_shuffle_pd(M, V, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \
- (__v8df)(__m512d)(V), (int)(imm), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1); })
+#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+   (__v8df)(__m512d)(B), \
+   (((M) & 0x01) >> 0) +  0, \
+   (((M) & 0x02) >> 1) +  8, \
+   (((M) & 0x04) >> 2) +  2, \
+   (((M) & 0x08) >> 3) + 10, \
+   (((M) & 0x10) >> 4) +  4, \
+   (((M) & 0x20) >> 5) + 12, \
+   (((M) & 0x40) >> 6) +  6, \
+   (((M) & 0x80) >> 7) + 14); })
 
-#define _mm512_mask_shuffle_pd(W, U, M, V, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \
- (__v8df)(__m512d)(V), (int)(imm), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U)); })
+#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+   (__v8df)_mm512_shuffle_pd((A), (B), 
(M)), \
+   (__v8df)(__m512d)(W)); })
 
-#define _mm512_maskz_shuffle_pd(U, M, V, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \
- (__v8df)(__m512d)(V), (int)(imm), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U)); })
+#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+   (__v8df)_mm512_shuffle_pd((A), (B), 
(M)), \
+   (__v8df)_mm512_setzero_pd()); })
 
 #define _mm512_shuffle_ps(M, V, imm) __extension__ ({ \
   (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \

Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=274523=274522=274523=diff
==
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Mon Jul  4 16:30:47 2016
@@ -7374,51 +7374,45 @@ _mm256_maskz_sra_epi64 (__mmask8 __U, __
   (__v4di)_mm256_setzero_si256(), \
   (__mmask8)(U)); })
 
-#define _mm_mask_shuffle_pd(W, U, A, B, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_shufpd128_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), (int)(imm), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U)); })
-
-#define _mm_maskz_shuffle_pd(U, A, B, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_shufpd128_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U)); })
-
-#define _mm256_mask_shuffle_pd(W, U, A, B, imm) __extension__ ({ \
-  

r274502 - [X86][AVX512] Converted the VPERMPD/VPERMQ intrinsics to generic IR

2016-07-04 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon Jul  4 08:34:44 2016
New Revision: 274502

URL: http://llvm.org/viewvc/llvm-project?rev=274502=rev
Log:
[X86][AVX512] Converted the VPERMPD/VPERMQ intrinsics to generic IR

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274502=274501=274502=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Jul  4 08:34:44 2016
@@ -8678,35 +8678,49 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128
   -(__v2df)(__m128d)(Y), \
   (__mmask8)(U), (int)(R)); })
 
-#define _mm512_permutex_pd(X, M) __extension__ ({ \
-  (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1); })
-
-#define _mm512_mask_permutex_pd(W, U, X, M) __extension__ ({ \
-  (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U)); })
-
-#define _mm512_maskz_permutex_pd(U, X, M) __extension__ ({ \
-  (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U)); })
-
-#define _mm512_permutex_epi64(X, I) __extension__ ({ \
-  (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
- (__v8di)_mm512_undefined_epi32(), \
- (__mmask8)-1); })
-
-#define _mm512_mask_permutex_epi64(W, M, X, I) __extension__ ({ \
-  (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(M)); })
-
-#define _mm512_maskz_permutex_epi64(M, X, I) __extension__ ({ \
-  (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(M)); })
+#define _mm512_permutex_pd(X, C) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+   (__v8df)_mm512_undefined_pd(), \
+   0 + (((C) & 0x03) >> 0), \
+   0 + (((C) & 0x0c) >> 2), \
+   0 + (((C) & 0x30) >> 4), \
+   0 + (((C) & 0xc0) >> 6), \
+   4 + (((C) & 0x03) >> 0), \
+   4 + (((C) & 0x0c) >> 2), \
+   4 + (((C) & 0x30) >> 4), \
+   4 + (((C) & 0xc0) >> 6)); })
+
+#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+   (__v8df)_mm512_permutex_pd((X), (C)), \
+   (__v8df)(__m512d)(W)); })
+
+#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+   (__v8df)_mm512_permutex_pd((X), (C)), \
+   (__v8df)_mm512_setzero_pd()); })
+
+#define _mm512_permutex_epi64(X, C) __extension__ ({ \
+  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
+   (__v8di)_mm512_undefined_epi32(), \
+   0 + (((C) & 0x03) >> 0), \
+   0 + (((C) & 0x0c) >> 2), \
+   0 + (((C) & 0x30) >> 4), \
+   0 + (((C) & 0xc0) >> 6), \
+   4 + (((C) & 0x03) >> 0), \
+   4 + (((C) & 0x0c) >> 2), \
+   4 + (((C) & 0x30) >> 4), \
+   4 + (((C) & 0xc0) >> 6)); })
+
+#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+  (__v8di)_mm512_permutex_epi64((X), (C)), 
\
+  (__v8di)(__m512i)(W)); })
+
+#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+  (__v8di)_mm512_permutex_epi64((X), (C)), 
\
+ 

r274492 - [X86][AVX512] Converted the VPERMILPD/VPERMILPS intrinsics to generic IR

2016-07-04 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon Jul  4 06:06:15 2016
New Revision: 274492

URL: http://llvm.org/viewvc/llvm-project?rev=274492=rev
Log:
[X86][AVX512] Converted the VPERMILPD/VPERMILPS intrinsics to generic IR

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274492=274491=274492=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Jul  4 06:06:15 2016
@@ -6540,34 +6540,56 @@ _mm512_mask2_permutex2var_epi64 (__m512i
 }
 
 #define _mm512_permute_pd(X, C) __extension__ ({ \
-  (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
-(__v8df)_mm512_undefined_pd(), \
-(__mmask8)-1); })
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+   (__v8df)_mm512_setzero_pd(), \
+   0 + (((C) & 0x01) >> 0), \
+   0 + (((C) & 0x02) >> 1), \
+   2 + (((C) & 0x04) >> 2), \
+   2 + (((C) & 0x08) >> 3), \
+   4 + (((C) & 0x10) >> 4), \
+   4 + (((C) & 0x20) >> 5), \
+   6 + (((C) & 0x40) >> 6), \
+   6 + (((C) & 0x80) >> 7)); })
 
 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
-  (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
-(__v8df)(__m512d)(W), \
-(__mmask8)(U)); })
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+   (__v8df)_mm512_permute_pd((X), (C)), \
+   (__v8df)(__m512d)(W)); })
 
 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
-  (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
-(__v8df)_mm512_setzero_pd(), \
-(__mmask8)(U)); })
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+   (__v8df)_mm512_permute_pd((X), (C)), \
+   (__v8df)_mm512_setzero_pd()); })
 
 #define _mm512_permute_ps(X, C) __extension__ ({ \
-  (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
-   (__v16sf)_mm512_undefined_ps(), \
-   (__mmask16)-1); })
+  (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
+  (__v16sf)_mm512_setzero_ps(), \
+   0  + (((C) & 0x03) >> 0), \
+   0  + (((C) & 0x0c) >> 2), \
+   0  + (((C) & 0x30) >> 4), \
+   0  + (((C) & 0xc0) >> 6), \
+   4  + (((C) & 0x03) >> 0), \
+   4  + (((C) & 0x0c) >> 2), \
+   4  + (((C) & 0x30) >> 4), \
+   4  + (((C) & 0xc0) >> 6), \
+   8  + (((C) & 0x03) >> 0), \
+   8  + (((C) & 0x0c) >> 2), \
+   8  + (((C) & 0x30) >> 4), \
+   8  + (((C) & 0xc0) >> 6), \
+   12 + (((C) & 0x03) >> 0), \
+   12 + (((C) & 0x0c) >> 2), \
+   12 + (((C) & 0x30) >> 4), \
+   12 + (((C) & 0xc0) >> 6)); })
 
 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
-  (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
-   (__v16sf)(__m512)(W), \
-   (__mmask16)(U)); })
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+  (__v16sf)_mm512_permute_ps((X), (C)), \
+  (__v16sf)(__m512)(W)); })
 
 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
-  (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
-   (__v16sf)_mm512_setzero_ps(), \
-   (__mmask16)(U)); })
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+  (__v16sf)_mm512_permute_ps((X), (C)), \
+  (__v16sf)_mm512_setzero_ps()); })
 
 

r274442 - [X86][AVX512] Converted the MOVDDUP/MOVSLDUP/MOVSHDUP masked intrinsics to generic IR

2016-07-02 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Sat Jul  2 12:16:25 2016
New Revision: 274442

URL: http://llvm.org/viewvc/llvm-project?rev=274442=rev
Log:
[X86][AVX512] Converted the MOVDDUP/MOVSLDUP/MOVSHDUP masked intrinsics to 
generic IR

llvm companion patch imminent

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=274442=274441=274442=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Jul  2 12:16:25 2016
@@ -1668,9 +1668,6 @@ TARGET_BUILTIN(__builtin_ia32_movdqa64lo
 TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, 
"V4LLiV4LLiC*V4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, 
"vV2LLi*V2LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, 
"vV4LLi*V4LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movddup512_mask, "V8dV8dV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movddup128_mask, "V2dV2dV2dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movddup256_mask, "V4dV4dV4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_gpr_mask, 
"V64ccV64cULLi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_gpr_mask, 
"V16ccV16cUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_gpr_mask, 
"V32ccV32cUi","","avx512bw,avx512vl")
@@ -2122,12 +2119,6 @@ TARGET_BUILTIN(__builtin_ia32_compresssf
 TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, 
"V16iV16iV16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movshdup512_mask, "V16fV16fV16fUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movsldup512_mask, "V16fV16fV16fUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movshdup128_mask, "V4fV4fV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, 
"V8LLiV8LLiV8LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, 
"V8dV8dC*V8dUc","","avx512f")

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=274442=274441=274442=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sat Jul  2 12:16:25 2016
@@ -5572,32 +5572,27 @@ _mm512_mask_store_epi64 (void *__P, __mm
   (__mmask8) __U);
 }
 
-
-
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_movedup_pd (__m512d __A)
 {
-  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
-   (__v8df)
-   _mm512_undefined_pd (),
-   (__mmask8) -1);
+  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
+  0, 0, 2, 2, 4, 4, 6, 6);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
-  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
-   (__v8df) __W,
-   (__mmask8) __U);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+  (__v8df)_mm512_movedup_pd(__A),
+  (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
 {
-  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
-   (__v8df)
-   _mm512_setzero_pd (),
-   (__mmask8) __U);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+  (__v8df)_mm512_movedup_pd(__A),
+  (__v8df)_mm512_setzero_pd());
 }
 
 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
@@ -8988,53 +8983,47 @@ _mm512_maskz_compress_epi32 (__mmask16 _
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_movehdup_ps (__m512 __A)
 {
-  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
-   (__v16sf)
-   _mm512_undefined_ps (),
-   (__mmask16) -1);
+  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
+ 1, 1, 3, 3, 

r274126 - [X86][SSE2] Updated tests to match llvm\test\CodeGen\X86\sse2-intrinsics-fast-isel-x86_64.ll

2016-06-29 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed Jun 29 09:04:08 2016
New Revision: 274126

URL: http://llvm.org/viewvc/llvm-project?rev=274126=rev
Log:
[X86][SSE2] Updated tests to match 
llvm\test\CodeGen\X86\sse2-intrinsics-fast-isel-x86_64.ll

Modified:
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=274126=274125=274126=diff
==
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Wed Jun 29 09:04:08 2016
@@ -701,6 +701,14 @@ __m128i test_mm_loadu_si128(__m128i cons
   return _mm_loadu_si128(A);
 }
 
+__m128i test_mm_loadu_si64(void const* A) {
+  // CHECK-LABEL: test_mm_loadu_si64
+  // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
+  return _mm_loadu_si64(A);
+}
+
 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_madd_epi16
   // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}})
@@ -1532,12 +1540,3 @@ __m128i test_mm_xor_si128(__m128i A, __m
   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
   return _mm_xor_si128(A, B);
 }
-
-__m128i test_mm_loadu_si64(void const* A) {
-  // CHECK-LABEL: test_mm_loadu_si64
-  // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
-  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
-  // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
-  return _mm_loadu_si64(A);
-}
-


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21504: [X86] add _mm_loadu_si64

2016-06-22 Thread Simon Pilgrim via cfe-commits
RKSimon added inline comments.


Comment at: tools/clang/test/CodeGen/sse2-builtins.c:1526
@@ +1525,3 @@
+  // CHECK-LABEL: test_mm_loadu_si64
+  // CHECK: load i64, i64* %__u
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0

Please can add the alignment operand to the CHECK (it should be align 1)?


Repository:
  rL LLVM

http://reviews.llvm.org/D21504



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)

2016-06-21 Thread Simon Pilgrim via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - the compile warning is clear and it could be a problem if we allow 
undefined values through on pre-AVX targets.

The only other thing we could do is handle these in CGBuiltin and 'accept' 0-7 
values through on sse/sse2 targets and assert on other values but I don't see 
how this would be better.


http://reviews.llvm.org/D21306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21373: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.

2016-06-21 Thread Simon Pilgrim via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM



Comment at: lib/Headers/bmiintrin.h:284
@@ -283,2 +283,3 @@
 ///bits in the operand.
+
 static __inline__ unsigned int __RELAXED_FN_ATTRS

Why the newlines? It  doesn't match the rest of the header.


Comment at: lib/Headers/bmiintrin.h:296
@@ +295,3 @@
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X

m_zuckerman wrote:
> We can't use #define Here. The __mm_tzcnt_32(a) intrinsics is deferent from 
> __tzcnt_u32  in the return value. The __mm_tzcnt_32 intrinsic return sign int 
> while the __tzcnt_u32 return unsign value.   
Ah! Missed that bit - thats fine.


http://reviews.llvm.org/D21373



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21504: [X86] add _mm_loadu_si64

2016-06-19 Thread Simon Pilgrim via cfe-commits
RKSimon added a subscriber: RKSimon.


Comment at: tools/clang/test/CodeGen/sse2-builtins.c:1527
@@ +1526,3 @@
+  // CHECK: load i64, i64* %__u
+  // CHECK: insertelement <2 x i64> undef, i64 %4, i32 0
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1

Replace the hardcoded %4 argument with a general pattern match


Comment at: tools/clang/test/CodeGen/sse2-builtins.c:1530
@@ +1529,3 @@
+  // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+  // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+  return _mm_loadu_si64(A);

Is the store/load necessary? This appears to be just the -O0 stack behaviour


Repository:
  rL LLVM

http://reviews.llvm.org/D21504



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r273090 - [X86][XOP] Refreshed builtin tests ready for creation of llvm fast-isel tests

2016-06-18 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Sat Jun 18 13:20:14 2016
New Revision: 273090

URL: http://llvm.org/viewvc/llvm-project?rev=273090=rev
Log:
[X86][XOP] Refreshed builtin tests ready for creation of llvm fast-isel tests

Modified:
cfe/trunk/test/CodeGen/xop-builtins.c

Modified: cfe/trunk/test/CodeGen/xop-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/xop-builtins.c?rev=273090=273089=273090=diff
==
--- cfe/trunk/test/CodeGen/xop-builtins.c (original)
+++ cfe/trunk/test/CodeGen/xop-builtins.c Sat Jun 18 13:20:14 2016
@@ -1,390 +1,393 @@
 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop 
-emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop 
-fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
 
 #include 
 
+// NOTE: This should match the tests in 
llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
+
 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maccs_epi16
-  // CHECK: @llvm.x86.xop.vpmacssww
+  // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_maccs_epi16(a, b, c);
 }
 
 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_macc_epi16
-  // CHECK: @llvm.x86.xop.vpmacsww
+  // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}}, <8 x i16> %{{.*}})
   return _mm_macc_epi16(a, b, c);
 }
 
 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maccsd_epi16
-  // CHECK: @llvm.x86.xop.vpmacsswd
+  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_maccsd_epi16(a, b, c);
 }
 
 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maccd_epi16
-  // CHECK: @llvm.x86.xop.vpmacswd
+  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}}, <4 x i32> %{{.*}})
   return _mm_maccd_epi16(a, b, c);
 }
 
 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maccs_epi32
-  // CHECK: @llvm.x86.xop.vpmacssdd
+  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x 
i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_maccs_epi32(a, b, c);
 }
 
 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_macc_epi32
-  // CHECK: @llvm.x86.xop.vpmacsdd
+  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}}, <4 x i32> %{{.*}})
   return _mm_macc_epi32(a, b, c);
 }
 
 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maccslo_epi32
-  // CHECK: @llvm.x86.xop.vpmacssdql
+  // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x 
i32> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_maccslo_epi32(a, b, c);
 }
 
 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_macclo_epi32
-  // CHECK: @llvm.x86.xop.vpmacsdql
+  // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x 
i32> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_macclo_epi32(a, b, c);
 }
 
 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maccshi_epi32
-  // CHECK: @llvm.x86.xop.vpmacssdqh
+  // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x 
i32> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_maccshi_epi32(a, b, c);
 }
 
 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_macchi_epi32
-  // CHECK: @llvm.x86.xop.vpmacsdqh
+  // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x 
i32> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_macchi_epi32(a, b, c);
 }
 
 __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maddsd_epi16
-  // CHECK: @llvm.x86.xop.vpmadcsswd
+  // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_maddsd_epi16(a, b, c);
 }
 
 __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) {
   // CHECK-LABEL: test_mm_maddd_epi16
-  // CHECK: @llvm.x86.xop.vpmadcswd
+  // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_maddd_epi16(a, b, c);
 }
 
 __m128i test_mm_haddw_epi8(__m128i a) {
   // CHECK-LABEL: test_mm_haddw_epi8
-  // CHECK: @llvm.x86.xop.vphaddbw
+  // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}})
   return _mm_haddw_epi8(a);
 }
 
 __m128i test_mm_haddd_epi8(__m128i a) {
   // CHECK-LABEL: test_mm_haddd_epi8
-  // CHECK: @llvm.x86.xop.vphaddbd
+  // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}})
   return 

r273086 - [X86][TBM] Refreshed builtin tests ready for creation of llvm fast-isel tests

2016-06-18 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Sat Jun 18 12:09:40 2016
New Revision: 273086

URL: http://llvm.org/viewvc/llvm-project?rev=273086=rev
Log:
[X86][TBM] Refreshed builtin tests ready for creation of llvm fast-isel tests

Modified:
cfe/trunk/test/CodeGen/tbm-builtins.c

Modified: cfe/trunk/test/CodeGen/tbm-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/tbm-builtins.c?rev=273086=273085=273086=diff
==
--- cfe/trunk/test/CodeGen/tbm-builtins.c (original)
+++ cfe/trunk/test/CodeGen/tbm-builtins.c Sat Jun 18 12:09:40 2016
@@ -8,46 +8,56 @@
 
 #include 
 
+// NOTE: This should match the tests in 
llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
+
 unsigned int test__bextri_u32(unsigned int a) {
-  // CHECK: call i32 @llvm.x86.tbm.bextri.u32
+  // CHECK-LABEL: test__bextri_u32
+  // CHECK: call i32 @llvm.x86.tbm.bextri.u32(i32 %{{.*}}, i32 1)
   return __bextri_u32(a, 1);
 }
 
 unsigned long long test__bextri_u64(unsigned long long a) {
-  // CHECK: call i64 @llvm.x86.tbm.bextri.u64
+  // CHECK-LABEL: test__bextri_u64
+  // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2)
   return __bextri_u64(a, 2);
 }
 
 unsigned long long test__bextri_u64_bigint(unsigned long long a) {
-  // CHECK: call i64 @llvm.x86.tbm.bextri.u64
+  // CHECK-LABEL: test__bextri_u64_bigint
+  // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887)
   return __bextri_u64(a, 0x7fLL);
 }
 
 unsigned int test__blcfill_u32(unsigned int a) {
+  // CHECK-LABEL: test__blcfill_u32
   // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1
   // CHECK-NEXT: %{{.*}} = and i32 [[TMP]], [[SRC]]
   return __blcfill_u32(a);
 }
 
 unsigned long long test__blcfill_u64(unsigned long long a) {
+  // CHECK-LABEL: test__blcfill_u64
   // CHECK: [[TMPT:%.*]] = add i64 [[SRC:%.*]], 1
   // CHECK-NEXT: %{{.*}} = and i64 [[TMP]], [[SRC]]
   return __blcfill_u64(a);
 }
 
 unsigned int test__blci_u32(unsigned int a) {
+  // CHECK-LABEL: test__blci_u32
   // CHECK: [[TMP:%.*]] = sub i32 -2, [[SRC:%.*]]
   // CHECK-NEXT: %{{.*}} = or i32 [[TMP]], [[SRC]]
   return __blci_u32(a);
 }
 
 unsigned long long test__blci_u64(unsigned long long a) {
+  // CHECK-LABEL: test__blci_u64
   // CHECK: [[TMP:%.*]] = sub i64 -2, [[SRC:%.*]]
   // CHECK-NEXT: %{{.*}} = or i64 [[TMP]], [[SRC]]
   return __blci_u64(a);
 }
 
 unsigned int test__blcic_u32(unsigned int a) {
+  // CHECK-LABEL: test__blcic_u32
   // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1
   // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC]], 1
   // CHECK-NEXT: {{.*}} = and i32 [[TMP2]], [[TMP1]]
@@ -55,6 +65,7 @@ unsigned int test__blcic_u32(unsigned in
 }
 
 unsigned long long test__blcic_u64(unsigned long long a) {
+  // CHECK-LABEL: test__blcic_u64
   // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1
   // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC]], 1
   // CHECK-NEXT: {{.*}} = and i64 [[TMP2]], [[TMP1]]
@@ -62,42 +73,49 @@ unsigned long long test__blcic_u64(unsig
 }
 
 unsigned int test__blcmsk_u32(unsigned int a) {
+  // CHECK-LABEL: test__blcmsk_u32
   // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1
   // CHECK-NEXT: {{.*}} = xor i32 [[TMP]], [[SRC]]
   return __blcmsk_u32(a);
 }
 
 unsigned long long test__blcmsk_u64(unsigned long long a) {
+  // CHECK-LABEL: test__blcmsk_u64
   // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1
   // CHECK-NEXT: {{.*}} = xor i64 [[TMP]], [[SRC]]
   return __blcmsk_u64(a);
 }
 
 unsigned int test__blcs_u32(unsigned int a) {
+  // CHECK-LABEL: test__blcs_u32
   // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1
   // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]]
   return __blcs_u32(a);
 }
 
 unsigned long long test__blcs_u64(unsigned long long a) {
+  // CHECK-LABEL: test__blcs_u64
   // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1
   // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]]
   return __blcs_u64(a);
 }
 
 unsigned int test__blsfill_u32(unsigned int a) {
+  // CHECK-LABEL: test__blsfill_u32
   // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], -1
   // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]]
   return __blsfill_u32(a);
 }
 
 unsigned long long test__blsfill_u64(unsigned long long a) {
+  // CHECK-LABEL: test__blsfill_u64
   // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], -1
   // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]]
   return __blsfill_u64(a);
 }
 
 unsigned int test__blsic_u32(unsigned int a) {
+  // CHECK-LABEL: test__blsic_u32
   // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1
   // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], -1
   // CHECK-NEXT: {{.*}} = or i32 [[TMP2]], [[TMP1]]
@@ -105,6 +123,7 @@ unsigned int test__blsic_u32(unsigned in
 }
 
 unsigned long long test__blsic_u64(unsigned long long a) {
+  // CHECK-LABEL: test__blsic_u64
   // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1
   // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], -1
   // CHECK-NEXT: {{.*}} = or i64 [[TMP2]], [[TMP1]]
@@ -112,6 +131,7 @@ unsigned long long 

r273003 - [X86][SSE4A] Use native IR for mask movntsd/movntss intrinsics.

2016-06-17 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri Jun 17 09:28:16 2016
New Revision: 273003

URL: http://llvm.org/viewvc/llvm-project?rev=273003=rev
Log:
[X86][SSE4A] Use native IR for mask movntsd/movntss intrinsics.

Depends on llvm side commit r273002.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/sse4a-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=273003=273002=273003=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 17 09:28:16 2016
@@ -6848,6 +6848,26 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 SI->setAlignment(1);
 return SI;
   }
+  case X86::BI__builtin_ia32_movntsd:
+  case X86::BI__builtin_ia32_movntss: {
+llvm::MDNode *Node = llvm::MDNode::get(
+getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+
+// Extract the 0'th element of the source vector.
+Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
+
+// Convert the type of the pointer to a pointer to the stored type.
+Value *BC = Builder.CreateBitCast(Ops[0],
+llvm::PointerType::getUnqual(Scl->getType()),
+  "cast");
+
+// Unaligned nontemporal store of the scalar value.
+StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
+SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+SI->setAlignment(1);
+return SI;
+  }
+
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
   case X86::BI__builtin_ia32_selectb_512:

Modified: cfe/trunk/test/CodeGen/sse4a-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse4a-builtins.c?rev=273003=273002=273003=diff
==
--- cfe/trunk/test/CodeGen/sse4a-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse4a-builtins.c Fri Jun 17 09:28:16 2016
@@ -33,12 +33,14 @@ __m128i test_mm_insert_si64(__m128i x, _
 
 void test_mm_stream_sd(double *p, __m128d a) {
   // CHECK-LABEL: test_mm_stream_sd
-  // CHECK: call void @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> 
%{{[^,]+}})
-  _mm_stream_sd(p, a);
+  // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+  // CHECK: store double %{{.*}}, double* %{{.*}}, align 1, !nontemporal
+   _mm_stream_sd(p, a);
 }
 
 void test_mm_stream_ss(float *p, __m128 a) {
   // CHECK-LABEL: test_mm_stream_ss
-  // CHECK: call void @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> 
%{{[^,]+}})
+  // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+  // CHECK: store float %{{.*}}, float* %{{.*}}, align 1, !nontemporal
   _mm_stream_ss(p, a);
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21306: [x86] AVX FP compare builtins should require AVX target feature (PR28112)

2016-06-15 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

It seems like part of the need for this is because the _mm_cmp_ps style 
intrinsics are defined as macros (to get around the problem of trying to use an 
immediate as an argument):

  #define _mm_cmp_ps(a, b, c) __extension__ ({ \
(__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
 (__v4sf)(__m128)(b), (c)); })

which means clang can't use a __target__("avx") attribute to stop their use.

Given that I'm happy with this patch's approach - anyone else have any 
suggestions?


http://reviews.llvm.org/D21306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21373: [Clang][bmi][intrinsics] Adding _mm_tzcnt_64 _mm_tzcnt_32 intrinsics to clang.

2016-06-15 Thread Simon Pilgrim via cfe-commits
RKSimon added a subscriber: RKSimon.
RKSimon added a reviewer: RKSimon.


Comment at: lib/Headers/bmiintrin.h:296
@@ -290,1 +295,3 @@
+}
+
 #ifdef __x86_64__

Why not just #define to __tzcnt_u32 like the (many) other duplicate tzcnt 
intrinsics we have:

```
#define _mm_tzcnt_32(a) (__tzcnt_u32((a)))
```

Same for _mm_tzcnt_64

Also, please can you copy/paste/edit the doxygen comment so that its properly 
documented?


http://reviews.llvm.org/D21373



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.

2016-06-13 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

http://reviews.llvm.org/D21272 has now been committed, which I think removes 
the need for this patch. http://reviews.llvm.org/D20359 is still needed (with 
the additional tests requested by Craig).


http://reviews.llvm.org/D20358



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r272541 - Fix unused variable warning

2016-06-13 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon Jun 13 05:05:19 2016
New Revision: 272541

URL: http://llvm.org/viewvc/llvm-project?rev=272541=rev
Log:
Fix unused variable warning

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=272541=272540=272541=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jun 13 05:05:19 2016
@@ -243,14 +243,14 @@ static Value *EmitSignBit(CodeGenFunctio
 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
 // we need to shift the high bits down to the low before truncating.
 Width >>= 1;
-if (CGF.getTarget().isBigEndian()) {
-  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
-  V = CGF.Builder.CreateLShr(V, ShiftCst);
-} 
-// We are truncating value in order to extract the higher-order 
-// double, which we will be using to extract the sign from.
-IntTy = llvm::IntegerType::get(C, Width);
-V = CGF.Builder.CreateTrunc(V, IntTy);
+if (CGF.getTarget().isBigEndian()) {
+  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
+  V = CGF.Builder.CreateLShr(V, ShiftCst);
+}
+// We are truncating value in order to extract the higher-order
+// double, which we will be using to extract the sign from.
+IntTy = llvm::IntegerType::get(C, Width);
+V = CGF.Builder.CreateTrunc(V, IntTy);
   }
   Value *Zero = llvm::Constant::getNullValue(IntTy);
   return CGF.Builder.CreateICmpSLT(V, Zero);
@@ -1815,13 +1815,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(
 case Builtin::BI__builtin_smull_overflow:
 case Builtin::BI__builtin_smulll_overflow:
   IntrinsicId = llvm::Intrinsic::smul_with_overflow;
-  break;
-}
-
-
-llvm::Value *Carry;
-llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
-Builder.CreateStore(Sum, SumOutPtr);
+  break;
+}
+
+
+llvm::Value *Carry;
+llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
+Builder.CreateStore(Sum, SumOutPtr);
 
 return RValue::get(Carry);
   }
@@ -3569,13 +3569,13 @@ static Value *packTBLDVectorList(CodeGen
  llvm::Type *ResTy, unsigned IntID,
  const char *Name) {
   SmallVector TblOps;
-  if (ExtOp)
-TblOps.push_back(ExtOp);
-
-  // Build a vector containing sequential number like (0, 1, 2, ..., 15)  
-  SmallVector Indices;
-  llvm::VectorType *TblTy = cast(Ops[0]->getType());
-  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
+  if (ExtOp)
+TblOps.push_back(ExtOp);
+
+  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
+  SmallVector Indices;
+  llvm::VectorType *TblTy = cast(Ops[0]->getType());
+  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
 Indices.push_back(2*i);
 Indices.push_back(2*i+1);
   }
@@ -3596,13 +3596,13 @@ static Value *packTBLDVectorList(CodeGen
  ZeroTbl, Indices, Name));
   }
 
-  Function *TblF;
-  TblOps.push_back(IndexOp);
-  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
-  
-  return CGF.EmitNeonCall(TblF, TblOps, Name);
-}
-
+  Function *TblF;
+  TblOps.push_back(IndexOp);
+  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
+
+  return CGF.EmitNeonCall(TblF, TblOps, Name);
+}
+
 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
   unsigned Value;
   switch (BuiltinID) {
@@ -4102,13 +4102,13 @@ Value *CodeGenFunction::EmitARMBuiltinEx
 "vsha1h");
 
   // The ARM _MoveToCoprocessor builtins put the input register value as
-  // the first argument, but the LLVM intrinsic expects it as the third one.
-  case ARM::BI_MoveToCoprocessor:
-  case ARM::BI_MoveToCoprocessor2: {
-Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 
-   Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
-return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
-  Ops[3], Ops[4], Ops[5]});
+  // the first argument, but the LLVM intrinsic expects it as the third one.
+  case ARM::BI_MoveToCoprocessor:
+  case ARM::BI_MoveToCoprocessor2: {
+Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
+   Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
+return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
+  Ops[3], Ops[4], Ops[5]});
   }
   }
 
@@ -6701,27 +6701,26 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 if (Ops.size() == 3)
   return Align;
 
-return EmitX86Select(*this, Ops[4], Align, Ops[3]);
-  }
-
-  case X86::BI__builtin_ia32_movnti:
-  case X86::BI__builtin_ia32_movnti64: {
-llvm::MDNode 

r272540 - [Clang][X86] Convert non-temporal store builtins to generic __builtin_nontemporal_store in headers

2016-06-13 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon Jun 13 04:57:52 2016
New Revision: 272540

URL: http://llvm.org/viewvc/llvm-project?rev=272540=rev
Log:
[Clang][X86] Convert non-temporal store builtins to generic 
__builtin_nontemporal_store in headers

We can now use __builtin_nontemporal_store instead of target specific builtins 
for naturally aligned nontemporal stores which avoids the need for handling in 
CGBuiltin.cpp

The scalar integer nontemporal (unaligned) store builtins will have to wait as 
__builtin_nontemporal_store currently assumes natural alignment and doesn't 
accept the 'packed struct' trick that we use for normal unaligned load/stores.

The nontemporal loads require further backend support before we can safely 
convert them to __builtin_nontemporal_load

Differential Revision: http://reviews.llvm.org/D21272

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/xmmintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/builtins-x86.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=272540=272539=272540=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Jun 13 04:57:52 2016
@@ -313,7 +313,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtss2si64
 TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse")
-TARGET_BUILTIN(__builtin_ia32_movntps, "vf*V4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_sfence, "v", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "", "sse")
 TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "", "sse")
@@ -327,8 +326,6 @@ TARGET_BUILTIN(__builtin_ia32_movmskpd,
 TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2")
@@ -493,9 +490,6 @@ TARGET_BUILTIN(__builtin_ia32_vzeroupper
 TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_movntdq256, "vV4LLi*V4LLi", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_movntpd256, "vd*V4d", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_movntps256, "vf*V8f", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "", "avx")
@@ -2154,10 +2148,7 @@ TARGET_BUILTIN(__builtin_ia32_kortestzhi
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movntdq512, "vV8LLi*V8LLi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_movntdqa512, "V8LLiV8LLi*","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movntpd512, "vd*V8d","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movntps512, "vf*V16f","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_palignr512_mask, 
"V64cV64cV64cIiV64cULLi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr128_mask, 
"V16cV16cV16cIiV16cUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_palignr256_mask, 
"V32cV32cV32cIiV32cUi","","avx512bw,avx512vl")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=272540=272539=272540=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jun 13 04:57:52 2016
@@ -243,14 +243,14 @@ static Value *EmitSignBit(CodeGenFunctio
 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
 // we need to shift the high bits down to the low before truncating.
 Width >>= 1;
-if (CGF.getTarget().isBigEndian()) {
-  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
-  V = CGF.Builder.CreateLShr(V, ShiftCst);
-} 
-// We are truncating value in order to extract the higher-order 
-// double, which we will be using to extract the sign from.
-IntTy = 

Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.

2016-06-12 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20358#446241, @RKSimon wrote:

> In http://reviews.llvm.org/D20358#446220, @ab wrote:
>
> > In http://reviews.llvm.org/D20358#446218, @ab wrote:
> >
> > > In http://reviews.llvm.org/D20358#446210, @RKSimon wrote:
> > >
> > > > Is there any reason why we can't just get rid of all the SSE movnt 
> > > > builtins and use __builtin_nontemporal_store instead 
> > > > (http://reviews.llvm.org/D12313)?
> > >
> > >
> > > I wanted to suggest that too, but I think you'd have problems with the 
> > > (natural?) alignment requirement of __builtin_nontemporal_store (whereas 
> > > IIRC, movnti & friends accept unaligned pointers).
> >
> >
> > But now that I look at this again, I suppose we could have some 
> > __attribute__((aligned(1))), or something like r271214.
>
>
> True, luckily that only affects _mm_stream_si32 and _mm_stream_si64 - the 
> 'real' vector movnt stores all require type alignment. The _mm_stream_load_* 
> (movntdqa) loads cases should be trivial as well.


I've created http://reviews.llvm.org/D21272 that covers the conversion of 
SSE/SSE2/AVX/AVX512 non-temporal aligned vector stores to use 
__builtin_nontemporal_store in headers


http://reviews.llvm.org/D20358



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D21272: [Clang][X86] Convert non-temporal store builtins to generic __builtin_nontemporal_store in headers

2016-06-12 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: craig.topper, ab, spatel, andreadb.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

As discussed on D20358, we can now use __builtin_nontemporal_store instead of 
target specific builtins for naturally aligned nontemporal stores which avoids 
the need for handling in CGBuiltin.cpp

The scalar integer nontemporal (unaligned) store builtins will have to wait as 
__builtin_nontemporal_store currently assumes natural alignment and doesn't 
accept the 'packed struct' trick that we use for normal unaligned load/stores.

NOTE: The nontemporal loads require further backend support before we can 
safely convert them to __builtin_nontemporal_load

Repository:
  rL LLVM

http://reviews.llvm.org/D21272

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/avx512fintrin.h
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  lib/Headers/xmmintrin.h
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/builtins-x86.c

Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -300,7 +300,6 @@
   (void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f);
   tmp_i = __builtin_ia32_movmskps(tmp_V4f);
   tmp_i = __builtin_ia32_pmovmskb(tmp_V8c);
-  (void) __builtin_ia32_movntps(tmp_fp, tmp_V4f);
   (void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi);
   (void) __builtin_ia32_sfence();
 
@@ -318,8 +317,6 @@
 #ifdef USE_64
   (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi);
 #endif
-  (void) __builtin_ia32_movntpd(tmp_dp, tmp_V2d);
-  (void) __builtin_ia32_movntdq(tmp_V2LLip, tmp_V2LLi);
   tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
   tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d);
   tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d);
@@ -446,9 +443,6 @@
   tmp_V4d = __builtin_ia32_vbroadcastf128_pd256(tmp_V2dCp);
   tmp_V8f = __builtin_ia32_vbroadcastf128_ps256(tmp_V4fCp);
   tmp_V32c = __builtin_ia32_lddqu256(tmp_cCp);
-  __builtin_ia32_movntdq256(tmp_V4LLip, tmp_V4LLi);
-  __builtin_ia32_movntpd256(tmp_dp, tmp_V4d);
-  __builtin_ia32_movntps256(tmp_fp, tmp_V8f);
   tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2LLi);
   tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4i);
   tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4LLi);
Index: test/CodeGen/avx512f-builtins.c
===
--- test/CodeGen/avx512f-builtins.c
+++ test/CodeGen/avx512f-builtins.c
@@ -5800,7 +5800,7 @@
 
 void test_mm512_stream_si512(__m512i * __P, __m512i __A) {
   // CHECK-LABEL: @test_mm512_stream_si512
-  // CHECK: @llvm.x86.avx512.storent.q.512
+  // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal
   _mm512_stream_si512(__P, __A); 
 }
 
@@ -5812,13 +5812,13 @@
 
 void test_mm512_stream_pd(double *__P, __m512d __A) {
   // CHECK-LABEL: @test_mm512_stream_pd
-  // CHECK: @llvm.x86.avx512.storent.pd.512
+  // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, !nontemporal
   return _mm512_stream_pd(__P, __A); 
 }
 
 void test_mm512_stream_ps(float *__P, __m512 __A) {
   // CHECK-LABEL: @test_mm512_stream_ps
-  // CHECK: @llvm.x86.avx512.storent.ps.512
+  // CHECK: store <16 x float> %{{.*}}, <16 x float>* %{{.*}}, align 64, !nontemporal
   _mm512_stream_ps(__P, __A); 
 }
 
Index: lib/Headers/xmmintrin.h
===
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -2080,7 +2080,7 @@
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_ps(float *__p, __m128 __a)
 {
-  __builtin_ia32_movntps(__p, (__v4sf)__a);
+  __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
 }
 
 /// \brief Forces strong memory ordering (serialization) between store
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -2210,13 +2210,13 @@
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_pd(double *__p, __m128d __a)
 {
-  __builtin_ia32_movntpd(__p, (__v2df)__a);
+  __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_si128(__m128i *__p, __m128i __a)
 {
-  __builtin_ia32_movntdq(__p, (__v2di)__a);
+  __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
Index: lib/Headers/avxintrin.h
===
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -2496,19 +2496,19 @@
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_si256(__m256i *__a, __m256i __b)
 {
-  __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
+  __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_pd(double *__a, __m256d __b)
 {
-  

Re: [PATCH] D21268: [x86] translate SSE packed FP comparison builtins to IR

2016-06-12 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

Eeep that's certainly a lot more work than just adding a few extra cases! 
Please add a TODO explaining what we need to do?

If there is a problem with the header documentation please can you raise a 
bugzilla and CC Katya Romanova.


http://reviews.llvm.org/D21268



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D21268: [x86] translate SSE packed FP comparison builtins to IR

2016-06-12 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

Is there any reason that we shouldn't include the avxintrin.h 
__builtin_ia32_cmppd/__builtin_ia32_cmpps/__builtin_ia32_cmppd256/__builtin_ia32_cmpps256
 packed intrinsics in this CGBuiltin.cpp patch? Since we're heading towards 
nixing them anyhow.


http://reviews.llvm.org/D21268



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32

2016-06-05 Thread Simon Pilgrim via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - add test_mm512_setzero_pd() as well if you can.



Comment at: test/CodeGen/avx512f-builtins.c:7291
@@ +7290,3 @@
+
+__m512i test_mm512_setzero_ps()
+{

__m512d test_mm512_setzero_pd() ?


http://reviews.llvm.org/D20871



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32

2016-06-02 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

Can you add tests for the existing _mm512_setzero_* intrinsics as well please?


http://reviews.llvm.org/D20871



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.

2016-06-01 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20358#446220, @ab wrote:

> In http://reviews.llvm.org/D20358#446218, @ab wrote:
>
> > In http://reviews.llvm.org/D20358#446210, @RKSimon wrote:
> >
> > > Is there any reason why we can't just get rid of all the SSE movnt 
> > > builtins and use __builtin_nontemporal_store instead 
> > > (http://reviews.llvm.org/D12313)?
> >
> >
> > I wanted to suggest that too, but I think you'd have problems with the 
> > (natural?) alignment requirement of __builtin_nontemporal_store (whereas 
> > IIRC, movnti & friends accept unaligned pointers).
>
>
> But now that I look at this again, I suppose we could have some 
> __attribute__((aligned(1))), or something like r271214.


True, luckily that only affects _mm_stream_si32 and _mm_stream_si64 - the 
'real' vector movnt stores all require type alignment. The _mm_stream_load_* 
(movntdqa) loads cases should be trivial as well.


http://reviews.llvm.org/D20358



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20358: [Clang][AVX512][Intrinsics]Convert AVX non-temporal store builtins to LLVM-native IR.

2016-06-01 Thread Simon Pilgrim via cfe-commits
RKSimon added a subscriber: RKSimon.
RKSimon added a comment.

Is there any reason why we can't just get rid of all the SSE movnt builtins and 
use __builtin_nontemporal_store instead (http://reviews.llvm.org/D12313)?


http://reviews.llvm.org/D20358



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20871: [Clang][AVX512][Intrinsics] Adding two definitions _mm512_setzero and _mm512_setzero_epi32

2016-06-01 Thread Simon Pilgrim via cfe-commits
RKSimon added a subscriber: RKSimon.
RKSimon added a comment.

Tests?


http://reviews.llvm.org/D20871



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r271436 - [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)

2016-06-01 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed Jun  1 16:46:51 2016
New Revision: 271436

URL: http://llvm.org/viewvc/llvm-project?rev=271436=rev
Log:
[X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) 
f32/f64 to i32 with generic IR (clang)

The 'cvtt' truncation (round to zero) conversions can be safely represented as 
generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics. We 
already do this (implicitly) for the scalar equivalents.

Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a 
lot more backend work to correctly lower (both for debug and optimized builds).

Differential Revision: http://reviews.llvm.org/D20859

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/test/CodeGen/avx-builtins.c
cfe/trunk/test/CodeGen/builtins-x86.c
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=271436=271435=271436=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jun  1 16:46:51 2016
@@ -339,7 +339,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2dq,
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
@@ -462,9 +461,7 @@ TARGET_BUILTIN(__builtin_ia32_cmpps256,
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")

Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=271436=271435=271436=diff
==
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Wed Jun  1 16:46:51 2016
@@ -2108,7 +2108,7 @@ _mm256_cvtps_pd(__m128 __a)
 static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_cvttpd_epi32(__m256d __a)
 {
-  return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
+  return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS
@@ -2120,7 +2120,7 @@ _mm256_cvtpd_epi32(__m256d __a)
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvttps_epi32(__m256 __a)
 {
-  return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
+  return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
 }
 
 static __inline double __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271436=271435=271436=diff
==
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Wed Jun  1 16:46:51 2016
@@ -1744,7 +1744,7 @@ _mm_cvtps_epi32(__m128 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvttps_epi32(__m128 __a)
 {
-  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
+  return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si);
 }
 
 /// \brief Returns a vector of [4 x i32] where the lowest element is the input

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=271436=271435=271436=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Jun  1 16:46:51 2016
@@ -286,13 +286,13 @@ __m256d test_mm256_cvtps_pd(__m128 A) {
 
 __m128i test_mm256_cvttpd_epi32(__m256d A) {
   // CHECK-LABEL: test_mm256_cvttpd_epi32
-  // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}})
+  // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32>
   return _mm256_cvttpd_epi32(A);
 }
 
 __m256i test_mm256_cvttps_epi32(__m256 A) {
   // CHECK-LABEL: test_mm256_cvttps_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}})
+  // CHECK: 

Re: [PATCH] D20859: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)

2016-06-01 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL271436: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ 
truncating (round to zero)… (authored by RKSimon).

Changed prior to commit:
  http://reviews.llvm.org/D20859?vs=59204=59284#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20859

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/lib/Headers/avxintrin.h
  cfe/trunk/lib/Headers/emmintrin.h
  cfe/trunk/test/CodeGen/avx-builtins.c
  cfe/trunk/test/CodeGen/builtins-x86.c
  cfe/trunk/test/CodeGen/sse2-builtins.c

Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def
@@ -339,7 +339,6 @@
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
@@ -462,9 +461,7 @@
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")
Index: cfe/trunk/test/CodeGen/avx-builtins.c
===
--- cfe/trunk/test/CodeGen/avx-builtins.c
+++ cfe/trunk/test/CodeGen/avx-builtins.c
@@ -286,13 +286,13 @@
 
 __m128i test_mm256_cvttpd_epi32(__m256d A) {
   // CHECK-LABEL: test_mm256_cvttpd_epi32
-  // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}})
+  // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32>
   return _mm256_cvttpd_epi32(A);
 }
 
 __m256i test_mm256_cvttps_epi32(__m256 A) {
   // CHECK-LABEL: test_mm256_cvttps_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}})
+  // CHECK: fptosi <8 x float> %{{.*}} to <8 x i32>
   return _mm256_cvttps_epi32(A);
 }
 
Index: cfe/trunk/test/CodeGen/builtins-x86.c
===
--- cfe/trunk/test/CodeGen/builtins-x86.c
+++ cfe/trunk/test/CodeGen/builtins-x86.c
@@ -335,7 +335,6 @@
   tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
 #endif
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
-  tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
   (void) __builtin_ia32_lfence();
   (void) __builtin_ia32_mfence();
@@ -415,9 +414,7 @@
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
-  tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
   tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
-  tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
   tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
   tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
Index: cfe/trunk/test/CodeGen/sse2-builtins.c
===
--- cfe/trunk/test/CodeGen/sse2-builtins.c
+++ cfe/trunk/test/CodeGen/sse2-builtins.c
@@ -533,7 +533,7 @@
 
 __m128i test_mm_cvttps_epi32(__m128 A) {
   // CHECK-LABEL: test_mm_cvttps_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
+  // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
   return _mm_cvttps_epi32(A);
 }
 
Index: cfe/trunk/lib/Headers/avxintrin.h
===
--- cfe/trunk/lib/Headers/avxintrin.h
+++ cfe/trunk/lib/Headers/avxintrin.h
@@ -2108,7 +2108,7 @@
 static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_cvttpd_epi32(__m256d __a)
 {
-  return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
+  return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS
@@ -2120,7 +2120,7 @@
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvttps_epi32(__m256 __a)
 {
-  return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
+  return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
 }
 
 static __inline double __DEFAULT_FN_ATTRS
Index: cfe/trunk/lib/Headers/emmintrin.h
===
--- cfe/trunk/lib/Headers/emmintrin.h
+++ 

[PATCH] D20859: [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)

2016-06-01 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: ab, mkuper, craig.topper, spatel, andreadb.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

The 'cvtt' truncation (round to zero) conversions can be safely represented as 
generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics.

We already do this (implicitly) for the scalar equivalents.

Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a 
lot more backend work to correctly lower (both for debug and optimized builds).

Repository:
  rL LLVM

http://reviews.llvm.org/D20859

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  test/CodeGen/avx-builtins.c
  test/CodeGen/builtins-x86.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -533,7 +533,7 @@
 
 __m128i test_mm_cvttps_epi32(__m128 A) {
   // CHECK-LABEL: test_mm_cvttps_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
+  // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
   return _mm_cvttps_epi32(A);
 }
 
Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -335,7 +335,6 @@
   tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
 #endif
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
-  tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
   (void) __builtin_ia32_lfence();
   (void) __builtin_ia32_mfence();
@@ -415,9 +414,7 @@
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
-  tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
   tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
-  tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
   tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
   tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
Index: test/CodeGen/avx-builtins.c
===
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -286,13 +286,13 @@
 
 __m128i test_mm256_cvttpd_epi32(__m256d A) {
   // CHECK-LABEL: test_mm256_cvttpd_epi32
-  // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}})
+  // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32>
   return _mm256_cvttpd_epi32(A);
 }
 
 __m256i test_mm256_cvttps_epi32(__m256 A) {
   // CHECK-LABEL: test_mm256_cvttps_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}})
+  // CHECK: fptosi <8 x float> %{{.*}} to <8 x i32>
   return _mm256_cvttps_epi32(A);
 }
 
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -1744,7 +1744,7 @@
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvttps_epi32(__m128 __a)
 {
-  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
+  return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si);
 }
 
 /// \brief Returns a vector of [4 x i32] where the lowest element is the input
Index: lib/Headers/avxintrin.h
===
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -2108,7 +2108,7 @@
 static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_cvttpd_epi32(__m256d __a)
 {
-  return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
+  return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS
@@ -2120,7 +2120,7 @@
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvttps_epi32(__m256 __a)
 {
-  return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
+  return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
 }
 
 /* Vector replicate */
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -335,7 +335,6 @@
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
@@ -458,9 +457,7 @@
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")

r271219 - [X86][SSE] Added missing tests (merge failure)

2016-05-30 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon May 30 12:58:38 2016
New Revision: 271219

URL: http://llvm.org/viewvc/llvm-project?rev=271219=rev
Log:
[X86][SSE] Added missing tests (merge failure)

Differential Revision: http://reviews.llvm.org/D20617

Modified:
cfe/trunk/test/CodeGen/sse-builtins.c

Modified: cfe/trunk/test/CodeGen/sse-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=271219=271218=271219=diff
==
--- cfe/trunk/test/CodeGen/sse-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse-builtins.c Mon May 30 12:58:38 2016
@@ -651,8 +651,7 @@ void test_mm_store_ps(float* x, __m128 y
 void test_mm_store_ps1(float* x, __m128 y) {
   // CHECK-LABEL: test_mm_store_ps1
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> 
zeroinitializer
-  // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
-  // CHECK-NEXT: ret void
+  // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
   _mm_store_ps1(x, y);
 }
 
@@ -666,8 +665,7 @@ void test_mm_store_ss(float* x, __m128 y
 void test_mm_store1_ps(float* x, __m128 y) {
   // CHECK-LABEL: test_mm_store1_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> 
zeroinitializer
-  // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
-  // CHECK-NEXT: ret void
+  // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
   _mm_store1_ps(x, y);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-30 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL271218: [X86][SSE] _mm_store1_ps/_mm_store1_pd should 
require an aligned pointer (authored by RKSimon).

Changed prior to commit:
  http://reviews.llvm.org/D20617?vs=58397=58979#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20617

Files:
  cfe/trunk/lib/Headers/emmintrin.h
  cfe/trunk/lib/Headers/xmmintrin.h
  cfe/trunk/test/CodeGen/sse2-builtins.c

Index: cfe/trunk/test/CodeGen/sse2-builtins.c
===
--- cfe/trunk/test/CodeGen/sse2-builtins.c
+++ cfe/trunk/test/CodeGen/sse2-builtins.c
@@ -1205,6 +1205,13 @@
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 
Index: cfe/trunk/lib/Headers/emmintrin.h
===
--- cfe/trunk/lib/Headers/emmintrin.h
+++ cfe/trunk/lib/Headers/emmintrin.h
@@ -588,19 +588,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  *(__m128d*)__dp = __a;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
-  struct __mm_store1_pd_struct {
-double __u[2];
-  } __attribute__((__packed__, __may_alias__));
-  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
-  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
+  _mm_store_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {
-  *(__m128d *)__dp = __a;
+  return _mm_store1_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
Index: cfe/trunk/lib/Headers/xmmintrin.h
===
--- cfe/trunk/lib/Headers/xmmintrin.h
+++ cfe/trunk/lib/Headers/xmmintrin.h
@@ -1593,22 +1593,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store1_ps(float *__p, __m128 __a)
+_mm_store_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
-  _mm_storeu_ps(__p, __a);
+  *(__m128*)__p = __a;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps1(float *__p, __m128 __a)
+_mm_store1_ps(float *__p, __m128 __a)
 {
-return _mm_store1_ps(__p, __a);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
+  _mm_store_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
+_mm_store_ps1(float *__p, __m128 __a)
 {
-  *(__m128 *)__p = __a;
+  return _mm_store1_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS


Index: cfe/trunk/test/CodeGen/sse2-builtins.c
===
--- cfe/trunk/test/CodeGen/sse2-builtins.c
+++ cfe/trunk/test/CodeGen/sse2-builtins.c
@@ -1205,6 +1205,13 @@
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 
Index: cfe/trunk/lib/Headers/emmintrin.h
===
--- cfe/trunk/lib/Headers/emmintrin.h
+++ cfe/trunk/lib/Headers/emmintrin.h
@@ -588,19 +588,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  

r271218 - [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-30 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon May 30 12:55:25 2016
New Revision: 271218

URL: http://llvm.org/viewvc/llvm-project?rev=271218=rev
Log:
[X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

According to the gcc headers, intel intrinsics docs and msdn codegen the 
_mm_store1_pd (and its _mm_store_pd1 equivalent) should use an aligned pointer 
- the clang headers are the only implementation I can find that assume 
non-aligned stores (by storing with _mm_storeu_pd).

Additionally, according to the intel intrinsics docs and msdn codegen the 
_mm_store1_ps (_mm_store_ps1) requires a similarly aligned pointer.

This patch raises the alignment requirements to match the other implementations 
by calling _mm_store_ps/_mm_store_pd instead.

I've also added the missing _mm_store_pd1 intrinsic (which maps to 
_mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps).

As a followup I'll update the llvm fast-isel tests to match this codegen.

Differential Revision: http://reviews.llvm.org/D20617

Modified:
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/xmmintrin.h
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271218=271217=271218=diff
==
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Mon May 30 12:55:25 2016
@@ -588,19 +588,22 @@ _mm_store_sd(double *__dp, __m128d __a)
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  *(__m128d*)__dp = __a;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
-  struct __mm_store1_pd_struct {
-double __u[2];
-  } __attribute__((__packed__, __may_alias__));
-  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
-  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
+  _mm_store_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {
-  *(__m128d *)__dp = __a;
+  return _mm_store1_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/xmmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=271218=271217=271218=diff
==
--- cfe/trunk/lib/Headers/xmmintrin.h (original)
+++ cfe/trunk/lib/Headers/xmmintrin.h Mon May 30 12:55:25 2016
@@ -1593,22 +1593,22 @@ _mm_storeu_ps(float *__p, __m128 __a)
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store1_ps(float *__p, __m128 __a)
+_mm_store_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
-  _mm_storeu_ps(__p, __a);
+  *(__m128*)__p = __a;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps1(float *__p, __m128 __a)
+_mm_store1_ps(float *__p, __m128 __a)
 {
-return _mm_store1_ps(__p, __a);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
+  _mm_store_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
+_mm_store_ps1(float *__p, __m128 __a)
 {
-  *(__m128 *)__p = __a;
+  return _mm_store1_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=271218=271217=271218=diff
==
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Mon May 30 12:55:25 2016
@@ -1205,6 +1205,13 @@ void test_mm_store_pd(double* A, __m128d
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@ void test_mm_store_si128(__m128i* A, __m
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org

r271187 - [X86][SSE] Make unsigned integer vector types generally available

2016-05-29 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Sun May 29 13:49:08 2016
New Revision: 271187

URL: http://llvm.org/viewvc/llvm-project?rev=271187=rev
Log:
[X86][SSE] Make unsigned integer vector types generally available

As discussed on http://reviews.llvm.org/D20684, move the unsigned integer 
vector types used for zero extension to make them available for general use.


Modified:
cfe/trunk/lib/Headers/avx2intrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/smmintrin.h

Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=271187=271186=271187=diff
==
--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Sun May 29 13:49:08 2016
@@ -402,42 +402,36 @@ _mm256_cvtepi32_epi64(__m128i __V)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi16(__m128i __V)
 {
-  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
   return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi32(__m128i __V)
 {
-  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
   return 
(__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, 
(__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi64(__m128i __V)
 {
-  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
   return 
(__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, 
(__v16qu)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi32(__m128i __V)
 {
-  typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
   return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi64(__m128i __V)
 {
-  typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, 
(__v8hu)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu32_epi64(__m128i __V)
 {
-  typedef unsigned int __v4su __attribute__((__vector_size__(16)));
   return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);
 }
 

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271187=271186=271187=diff
==
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Sun May 29 13:49:08 2016
@@ -35,6 +35,12 @@ typedef long long __v2di __attribute__ (
 typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
+/* Unsigned types */
+typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
+typedef unsigned int __v4su __attribute__((__vector_size__(16)));
+typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+
 /* We need an explicitly signed variant for char. Note that this shouldn't
  * appear in the interface though. */
 typedef signed char __v16qs __attribute__((__vector_size__(16)));

Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=271187=271186=271187=diff
==
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Sun May 29 13:49:08 2016
@@ -324,42 +324,36 @@ _mm_cvtepi32_epi64(__m128i __V)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi16(__m128i __V)
 {
-  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
   return 
(__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, 
(__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi32(__m128i __V)
 {
-  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
   return 
(__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, 
(__v16qu)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi64(__m128i __V)
 {
-  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
   return 
(__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, 
(__v16qu)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi32(__m128i __V)
 {
-  typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
   return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, 
(__v8hu)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi64(__m128i __V)
 {

Re: [PATCH] D20359: [LLVM][AVX512][Intrinsics] Convert AVX non-temporal store builtins to LLVM-native IR.

2016-05-28 Thread Simon Pilgrim via cfe-commits
RKSimon added a subscriber: RKSimon.
RKSimon added a comment.

Should AVX512 store support (non-temporal or otherwise) be added to 
X86FastISel::X86FastEmitStore ?


http://reviews.llvm.org/D20359



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20684: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)

2016-05-28 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL271106: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer 
extension intrinsics with… (authored by RKSimon).

Changed prior to commit:
  http://reviews.llvm.org/D20684?vs=58626=58884#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20684

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/lib/Headers/avx2intrin.h
  cfe/trunk/lib/Headers/smmintrin.h
  cfe/trunk/test/CodeGen/avx2-builtins.c
  cfe/trunk/test/CodeGen/builtins-x86.c
  cfe/trunk/test/CodeGen/sse41-builtins.c

Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def
@@ -382,12 +382,6 @@
 TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbd128, "V4iV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbq128, "V2LLiV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbw128, "V8sV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxdq128, "V2LLiV4i", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwd128, "V4iV8s", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwq128, "V2LLiV8s", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmulld128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "", "sse4.1")
@@ -558,18 +552,6 @@
 TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbw256, "V16sV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbd256, "V8iV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbq256, "V4LLiV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxwd256, "V8iV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxwq256, "V4LLiV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxdq256, "V4LLiV4i", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbw256, "V16sV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbd256, "V8iV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "", "avx2")
Index: cfe/trunk/test/CodeGen/sse41-builtins.c
===
--- cfe/trunk/test/CodeGen/sse41-builtins.c
+++ cfe/trunk/test/CodeGen/sse41-builtins.c
@@ -119,37 +119,43 @@
 
 __m128i test_mm_cvtepu8_epi16(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> 
+  // CHECK: zext <8 x i8> {{.*}} to <8 x i16>
   return _mm_cvtepu8_epi16(a);
 }
 
 __m128i test_mm_cvtepu8_epi32(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> 
+  // CHECK: zext <4 x i8> {{.*}} to <4 x i32>
   return _mm_cvtepu8_epi32(a);
 }
 
 __m128i test_mm_cvtepu8_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> 
+  // CHECK: zext <2 x i8> {{.*}} to <2 x i64>
   return _mm_cvtepu8_epi64(a);
 }
 
 __m128i test_mm_cvtepu16_epi32(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu16_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
+  // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> 
+  // CHECK: zext <4 x i16> {{.*}} to <4 x i32>
   return _mm_cvtepu16_epi32(a);
 }
 
 __m128i test_mm_cvtepu16_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu16_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
+  // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> 
+  // CHECK: zext <2 x i16> {{.*}} to <2 x i64>
   return _mm_cvtepu16_epi64(a);
 }
 
 __m128i test_mm_cvtepu32_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu32_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
+  // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> 
+  // CHECK: zext <2 x i32> {{.*}} to <2 x i64>
   

r271106 - [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)

2016-05-28 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Sat May 28 03:12:45 2016
New Revision: 271106

URL: http://llvm.org/viewvc/llvm-project?rev=271106=rev
Log:
[X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with 
generic IR (clang)

The VPMOVSX and (V)PMOVZX sign/zero extension intrinsics can be safely 
represented as generic __builtin_convertvector calls instead of x86 intrinsics.

This patch removes the clang builtins and their use in the sse2/avx headers - a 
companion patch will remove/auto-upgrade the llvm intrinsics.

Note: We already did this for SSE41 PMOVSX sometime ago.

Differential Revision: http://reviews.llvm.org/D20684

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx2intrin.h
cfe/trunk/lib/Headers/smmintrin.h
cfe/trunk/test/CodeGen/avx2-builtins.c
cfe/trunk/test/CodeGen/builtins-x86.c
cfe/trunk/test/CodeGen/sse41-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=271106=271105=271106=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat May 28 03:12:45 2016
@@ -382,12 +382,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsb128,
 TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbd128, "V4iV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbq128, "V2LLiV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbw128, "V8sV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxdq128, "V2LLiV4i", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwd128, "V4iV8s", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwq128, "V2LLiV8s", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmulld128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "", "sse4.1")
@@ -558,18 +552,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsb256,
 TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbw256, "V16sV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbd256, "V8iV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbq256, "V4LLiV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxwd256, "V8iV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxwq256, "V4LLiV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxdq256, "V4LLiV4i", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbw256, "V16sV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbd256, "V8iV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "", "avx2")

Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=271106=271105=271106=diff
==
--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Sat May 28 03:12:45 2016
@@ -360,73 +360,85 @@ _mm256_movemask_epi8(__m256i __a)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi16(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+  return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+  return 
(__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, 
(__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+  return 

Re: [PATCH] D20684: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)

2016-05-27 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20684#442514, @ab wrote:

> I'd add the unsigned typedefs with their signed counterparts; no reason not 
> to.
>  With that, LGTM.


Thanks, I'll do that as a follow up commit.


Repository:
  rL LLVM

http://reviews.llvm.org/D20684



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-27 Thread Simon Pilgrim via cfe-commits
RKSimon added inline comments.


Comment at: lib/Headers/emmintrin.h:598
@@ -594,3 +597,3 @@
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {

majnemer wrote:
> You could use `__attribute__((align_value(16)))` no?
Technically yes but AFAICT there are no other users of this approach in the 
headers - is it something that we should be encouraging do you think?

Craig - I think you wrote in a commit about dropping the unaligned intrinsics, 
is that how you'd do it? 


Repository:
  rL LLVM

http://reviews.llvm.org/D20617



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D20684: [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)

2016-05-26 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: mkuper, craig.topper, spatel, andreadb.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

The VPMOVSX and (V)PMOVZX sign/zero extension intrinsics can be safely 
represented as generic __builtin_convertvector calls instead of x86 intrinsics.

This patch removes the clang builtins and their use in the sse2/avx headers - a 
companion patch will remove/auto-upgrade the llvm intrinsics.

Note: We already did this for SSE41 PMOVSX sometime ago.

Repository:
  rL LLVM

http://reviews.llvm.org/D20684

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avx2intrin.h
  lib/Headers/smmintrin.h
  test/CodeGen/avx2-builtins.c
  test/CodeGen/builtins-x86.c
  test/CodeGen/sse41-builtins.c

Index: test/CodeGen/sse41-builtins.c
===
--- test/CodeGen/sse41-builtins.c
+++ test/CodeGen/sse41-builtins.c
@@ -119,37 +119,43 @@
 
 __m128i test_mm_cvtepu8_epi16(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> 
+  // CHECK: zext <8 x i8> {{.*}} to <8 x i16>
   return _mm_cvtepu8_epi16(a);
 }
 
 __m128i test_mm_cvtepu8_epi32(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> 
+  // CHECK: zext <4 x i8> {{.*}} to <4 x i32>
   return _mm_cvtepu8_epi32(a);
 }
 
 __m128i test_mm_cvtepu8_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> 
+  // CHECK: zext <2 x i8> {{.*}} to <2 x i64>
   return _mm_cvtepu8_epi64(a);
 }
 
 __m128i test_mm_cvtepu16_epi32(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu16_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
+  // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> 
+  // CHECK: zext <4 x i16> {{.*}} to <4 x i32>
   return _mm_cvtepu16_epi32(a);
 }
 
 __m128i test_mm_cvtepu16_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu16_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
+  // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> 
+  // CHECK: zext <2 x i16> {{.*}} to <2 x i64>
   return _mm_cvtepu16_epi64(a);
 }
 
 __m128i test_mm_cvtepu32_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu32_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
+  // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> 
+  // CHECK: zext <2 x i32> {{.*}} to <2 x i64>
   return _mm_cvtepu32_epi64(a);
 }
 
Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -387,12 +387,6 @@
   tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i);
   tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i);
   tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_pmovzxbd128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovzxbq128(tmp_V16c);
-  tmp_V8s = __builtin_ia32_pmovzxbw128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovzxdq128(tmp_V4i);
-  tmp_V4i = __builtin_ia32_pmovzxwd128(tmp_V8s);
-  tmp_V2LLi = __builtin_ia32_pmovzxwq128(tmp_V8s);
   tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i);
   tmp_V4i = __builtin_ia32_pmulld128(tmp_V4i, tmp_V4i);
   tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16);
Index: test/CodeGen/avx2-builtins.c
===
--- test/CodeGen/avx2-builtins.c
+++ test/CodeGen/avx2-builtins.c
@@ -292,73 +292,79 @@
 
 __m256i test_mm256_cvtepi8_epi16(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi8_epi16
-  // CHECK: call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %{{.*}})
+  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
   return _mm256_cvtepi8_epi16(a);
 }
 
 __m256i test_mm256_cvtepi8_epi32(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi8_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %{{.*}})
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> 
+  // CHECK: sext <8 x i8> %{{.*}} to <8 x i32>
   return _mm256_cvtepi8_epi32(a);
 }
 
 __m256i test_mm256_cvtepi8_epi64(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi8_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %{{.*}})
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> 
+  // CHECK: sext <4 x i8> %{{.*}} to <4 x i64>
   return _mm256_cvtepi8_epi64(a);
 }
 
 __m256i test_mm256_cvtepi16_epi32(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi16_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %{{.*}})
+  

r270836 - [X86][F16C] Improved f16c intrinsics checks

2016-05-26 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu May 26 05:20:25 2016
New Revision: 270836

URL: http://llvm.org/viewvc/llvm-project?rev=270836=rev
Log:
[X86][F16C] Improved f16c intrinsics checks 

Added checks for upper elements being zero'd in scalar conversions

Modified:
cfe/trunk/test/CodeGen/f16c-builtins.c

Modified: cfe/trunk/test/CodeGen/f16c-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/f16c-builtins.c?rev=270836=270835=270836=diff
==
--- cfe/trunk/test/CodeGen/f16c-builtins.c (original)
+++ cfe/trunk/test/CodeGen/f16c-builtins.c Thu May 26 05:20:25 2016
@@ -7,36 +7,50 @@
 
 float test_cvtsh_ss(unsigned short a) {
   // CHECK-LABEL: test_cvtsh_ss
-  // CHECK: @llvm.x86.vcvtph2ps.128
+  // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
+  // CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}})
+  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
   return _cvtsh_ss(a);
 }
 
 unsigned short test_cvtss_sh(float a) {
   // CHECK-LABEL: test_cvtss_sh
-  // CHECK: @llvm.x86.vcvtps2ph.128
+  // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
+  // CHECK: insertelement <4 x float> %{{.*}}, float 0.00e+00, i32 1
+  // CHECK: insertelement <4 x float> %{{.*}}, float 0.00e+00, i32 2
+  // CHECK: insertelement <4 x float> %{{.*}}, float 0.00e+00, i32 3
+  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
+  // CHECK: extractelement <8 x i16> %{{.*}}, i32 0
   return _cvtss_sh(a, 0);
 }
 
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
-  // CHECK: @llvm.x86.vcvtph2ps.128
+  // CHECK: call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %{{.*}})
   return _mm_cvtph_ps(a);
 }
 
 __m256 test_mm256_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtph_ps
-  // CHECK: @llvm.x86.vcvtph2ps.256
+  // CHECK: call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %{{.*}})
   return _mm256_cvtph_ps(a);
 }
 
 __m128i test_mm_cvtps_ph(__m128 a) {
   // CHECK-LABEL: test_mm_cvtps_ph
-  // CHECK: @llvm.x86.vcvtps2ph.128
+  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
   return _mm_cvtps_ph(a, 0);
 }
 
 __m128i test_mm256_cvtps_ph(__m256 a) {
   // CHECK-LABEL: test_mm256_cvtps_ph
-  // CHECK: @llvm.x86.vcvtps2ph.256
+  // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
   return _mm256_cvtps_ph(a, 0);
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r270833 - [X86][AVX2] Improved checks for float/double mask generation for non-masked gathers

2016-05-26 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu May 26 04:56:50 2016
New Revision: 270833

URL: http://llvm.org/viewvc/llvm-project?rev=270833=rev
Log:
[X86][AVX2] Improved checks for float/double mask generation for non-masked 
gathers

Modified:
cfe/trunk/test/CodeGen/avx2-builtins.c

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=270833=270832=270833=diff
==
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu May 26 04:56:50 2016
@@ -467,6 +467,7 @@ __m256i test_mm256_mask_i32gather_epi64(
 
 __m128d test_mm_i32gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}}, i8 0)
   // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, 
i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
   return _mm_i32gather_pd(b, c, 2);
 }
@@ -479,6 +480,7 @@ __m128d test_mm_mask_i32gather_pd(__m128
 
 __m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm256_i32gather_pd
+  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, 
<4 x double> %{{.*}}, i8 0)
   // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> 
undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i32gather_pd(b, c, 2);
 }
@@ -491,6 +493,7 @@ __m256d test_mm256_mask_i32gather_pd(__m
 
 __m128 test_mm_i32gather_ps(float const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x 
float> %{{.*}}, i8 0)
   // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8* 
%{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm_i32gather_ps(b, c, 2);
 }
@@ -503,6 +506,7 @@ __m128 test_mm_mask_i32gather_ps(__m128
 
 __m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i32gather_ps
+  // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 
x float> %{{.*}}, i8 0)
   // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, 
i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
   return _mm256_i32gather_ps(b, c, 2);
 }
@@ -563,6 +567,7 @@ __m256i test_mm256_mask_i64gather_epi64(
 
 __m128d test_mm_i64gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}}, i8 0)
   // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> undef, 
i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
   return _mm_i64gather_pd(b, c, 2);
 }
@@ -575,6 +580,7 @@ __m128d test_mm_mask_i64gather_pd(__m128
 
 __m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_pd
+  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, 
<4 x double> %{{.*}}, i8 0)
   // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> 
undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i64gather_pd(b, c, 2);
 }
@@ -587,6 +593,7 @@ __m256d test_mm256_mask_i64gather_pd(__m
 
 __m128 test_mm_i64gather_ps(float const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x 
float> %{{.*}}, i8 0)
   // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> undef, i8* 
%{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm_i64gather_ps(b, c, 2);
 }
@@ -599,6 +606,7 @@ __m128 test_mm_mask_i64gather_ps(__m128
 
 __m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x 
float> %{{.*}}, i8 0)
   // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, 
i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm256_i64gather_ps(b, c, 2);
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-25 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20617#439200, @craig.topper wrote:

> Can you double check gcc's xmmintrin.h again. I'm pretty sure _mm_store1_ps
>  is calling _mm_storeu_ps.


Yes you're right - for gcc _mm_store1_pd is aligned (and there is a comment 
saying it must be), but _mm_store1_ps is unaligned. The intel intrinsics docs 
and msvc codegen both set both ps and pd versions to aligned store though.

If you wish I can just do the pd fixes - we are alone in doing a extract + 
2*movsd - the rest all use shufpd+movapd

Suggestions for ps?


Repository:
  rL LLVM

http://reviews.llvm.org/D20617



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r270708 - [X86][AVX2] Full set of AVX2 intrinsics tests

2016-05-25 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed May 25 10:10:49 2016
New Revision: 270708

URL: http://llvm.org/viewvc/llvm-project?rev=270708=rev
Log:
[X86][AVX2] Full set of AVX2 intrinsics tests

llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll will be synced to this

Modified:
cfe/trunk/test/CodeGen/avx2-builtins.c

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=270708=270707=270708=diff
==
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Wed May 25 10:10:49 2016
@@ -4,179 +4,113 @@
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
 
-#include 
+#include 
 
-__m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
-  // CHECK: @llvm.x86.avx2.mpsadbw({{.*}}, {{.*}}, i8 3)
-  return _mm256_mpsadbw_epu8(x, y, 3);
-}
-
-__m256i test_mm256_sad_epu8(__m256i x, __m256i y) {
-  // CHECK: @llvm.x86.avx2.psad.bw
-  return _mm256_sad_epu8(x, y);
-}
+// NOTE: This should match the tests in 
llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
 
 __m256i test_mm256_abs_epi8(__m256i a) {
-  // CHECK: @llvm.x86.avx2.pabs.b
+  // CHECK-LABEL: test_mm256_abs_epi8
+  // CHECK: call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %{{.*}})
   return _mm256_abs_epi8(a);
 }
 
 __m256i test_mm256_abs_epi16(__m256i a) {
-  // CHECK: @llvm.x86.avx2.pabs.w
+  // CHECK-LABEL: test_mm256_abs_epi16
+  // CHECK: call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %{{.*}})
   return _mm256_abs_epi16(a);
 }
 
 __m256i test_mm256_abs_epi32(__m256i a) {
-  // CHECK: @llvm.x86.avx2.pabs.d
+  // CHECK-LABEL: test_mm256_abs_epi32
+  // CHECK: call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %{{.*}})
   return _mm256_abs_epi32(a);
 }
 
-__m256i test_mm256_packs_epi16(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.packsswb
-  return _mm256_packs_epi16(a, b);
-}
-
-__m256i test_mm256_packs_epi32(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.packssdw
-  return _mm256_packs_epi32(a, b);
-}
-
-__m256i test_mm256_packs_epu16(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.packuswb
-  return _mm256_packus_epi16(a, b);
-}
-
-__m256i test_mm256_packs_epu32(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.packusdw
-  return _mm256_packus_epi32(a, b);
-}
-
 __m256i test_mm256_add_epi8(__m256i a, __m256i b) {
+  // CHECK-LABEL: test_mm256_add_epi8
   // CHECK: add <32 x i8>
   return _mm256_add_epi8(a, b);
 }
 
 __m256i test_mm256_add_epi16(__m256i a, __m256i b) {
+  // CHECK-LABEL: test_mm256_add_epi16
   // CHECK: add <16 x i16>
   return _mm256_add_epi16(a, b);
 }
 
 __m256i test_mm256_add_epi32(__m256i a, __m256i b) {
+  // CHECK-LABEL: test_mm256_add_epi32
   // CHECK: add <8 x i32>
   return _mm256_add_epi32(a, b);
 }
 
 __m256i test_mm256_add_epi64(__m256i a, __m256i b) {
+  // CHECK-LABEL: test_mm256_add_epi64
   // CHECK: add <4 x i64>
   return _mm256_add_epi64(a, b);
 }
 
 __m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.padds.b
+  // CHECK-LABEL: test_mm256_adds_epi8
+  // CHECK: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}}, <32 x i8> 
%{{.*}})
   return _mm256_adds_epi8(a, b);
 }
 
 __m256i test_mm256_adds_epi16(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.padds.w
+  // CHECK-LABEL: test_mm256_adds_epi16
+  // CHECK: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %{{.*}}, <16 x 
i16> %{{.*}})
   return _mm256_adds_epi16(a, b);
 }
 
 __m256i test_mm256_adds_epu8(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.paddus.b
+  // CHECK-LABEL: test_mm256_adds_epu8
+  // CHECK: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}}, <32 x 
i8> %{{.*}})
   return _mm256_adds_epu8(a, b);
 }
 
 __m256i test_mm256_adds_epu16(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.paddus.w
+  // CHECK-LABEL: test_mm256_adds_epu16
+  // CHECK: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %{{.*}}, <16 x 
i16> %{{.*}})
   return _mm256_adds_epu16(a, b);
 }
 
 __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) {
+  // CHECK-LABEL: test_mm256_alignr_epi8
   // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> 

   return _mm256_alignr_epi8(a, b, 2);
 }
 
 __m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) {
+  // CHECK-LABEL: test2_mm256_alignr_epi8
   // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x 
i32> 
   return _mm256_alignr_epi8(a, b, 17);
 }
 
-__m256i test_mm256_sub_epi8(__m256i a, __m256i b) {
-  // CHECK: sub <32 x i8>
-  return _mm256_sub_epi8(a, b);
-}
-
-__m256i test_mm256_sub_epi16(__m256i a, __m256i b) {
-  // CHECK: sub <16 x i16>
-  return _mm256_sub_epi16(a, b);
-}
-
-__m256i test_mm256_sub_epi32(__m256i a, __m256i b) {
-  // CHECK: sub <8 x i32>
-  return _mm256_sub_epi32(a, b);
-}
-
-__m256i test_mm256_sub_epi64(__m256i a, __m256i b) {
-  // CHECK: sub <4 x i64>
-  return _mm256_sub_epi64(a, b);
-}
-

[PATCH] D20617: [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

2016-05-25 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: craig.topper, spatel, andreadb.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

According to the gcc headers, intel intrinsics docs and msdn codegen the 
_mm_store1_ps/_mm_store1_pd (and their _mm_store_ps1/_mm_store_pd1 analogues) 
should require an aligned pointer - the clang headers are the only 
implementation I can find that assume non-aligned stores (by storing with 
_mm_storeu_ps/_mm_storeu_pd).

This patch raises the alignment requirements to match the other implementations 
by calling _mm_store_ps/_mm_store_pd instead.

I've also added the missing _mm_store_pd1 intrinsic (which maps to 
_mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps).

As a followup I'll update the llvm fast-isel tests to match this codegen.

Repository:
  rL LLVM

http://reviews.llvm.org/D20617

Files:
  lib/Headers/emmintrin.h
  lib/Headers/xmmintrin.h
  test/CodeGen/sse-builtins.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -1205,6 +1205,13 @@
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 
Index: test/CodeGen/sse-builtins.c
===
--- test/CodeGen/sse-builtins.c
+++ test/CodeGen/sse-builtins.c
@@ -651,7 +651,7 @@
 void test_mm_store_ps1(float* x, __m128 y) {
   // CHECK-LABEL: test_mm_store_ps1
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
-  // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
   _mm_store_ps1(x, y);
 }
 
@@ -665,7 +665,7 @@
 void test_mm_store1_ps(float* x, __m128 y) {
   // CHECK-LABEL: test_mm_store1_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
-  // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
   _mm_store1_ps(x, y);
 }
 
Index: lib/Headers/xmmintrin.h
===
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -1590,22 +1590,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store1_ps(float *__p, __m128 __a)
+_mm_store_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
-  _mm_storeu_ps(__p, __a);
+  *(__m128 *)__p = __a;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps1(float *__p, __m128 __a)
+_mm_store1_ps(float *__p, __m128 __a)
 {
-return _mm_store1_ps(__p, __a);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
+  _mm_store_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
+_mm_store_ps1(float *__p, __m128 __a)
 {
-  *(__m128 *)__p = __a;
+  return _mm_store1_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -582,19 +582,22 @@
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  *(__m128d *)__dp = __a;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
-  struct __mm_store1_pd_struct {
-double __u[2];
-  } __attribute__((__packed__, __may_alias__));
-  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
-  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
+  _mm_store_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {
-  *(__m128d *)__dp = __a;
+  return _mm_store1_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
___

r270679 - [X86][SSE] Updated _mm_store_ps1 test to match _mm_store1_ps

2016-05-25 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Wed May 25 04:20:08 2016
New Revision: 270679

URL: http://llvm.org/viewvc/llvm-project?rev=270679=rev
Log:
[X86][SSE] Updated _mm_store_ps1 test to match _mm_store1_ps

Modified:
cfe/trunk/test/CodeGen/sse-builtins.c

Modified: cfe/trunk/test/CodeGen/sse-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=270679=270678=270679=diff
==
--- cfe/trunk/test/CodeGen/sse-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse-builtins.c Wed May 25 04:20:08 2016
@@ -651,7 +651,7 @@ void test_mm_store_ps(float* x, __m128 y
 void test_mm_store_ps1(float* x, __m128 y) {
   // CHECK-LABEL: test_mm_store_ps1
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> 
zeroinitializer
-  // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
+  // CHECK: call void @llvm.x86.sse.storeu.ps(i8* %{{.*}}, <4 x float> %{{.*}})
   _mm_store_ps1(x, y);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

2016-05-23 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL270499: [X86][SSE] Replace lossless i32/f32 to f64 
conversion intrinsics with generic IR (authored by RKSimon).

Changed prior to commit:
  http://reviews.llvm.org/D20528?vs=58146=58160#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20528

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/lib/Headers/avxintrin.h
  cfe/trunk/lib/Headers/emmintrin.h
  cfe/trunk/test/CodeGen/avx-builtins.c
  cfe/trunk/test/CodeGen/builtins-x86.c
  cfe/trunk/test/CodeGen/sse2-builtins.c
  cfe/trunk/test/CodeGen/target-builtin-error-2.c

Index: cfe/trunk/test/CodeGen/target-builtin-error-2.c
===
--- cfe/trunk/test/CodeGen/target-builtin-error-2.c
+++ cfe/trunk/test/CodeGen/target-builtin-error-2.c
@@ -5,9 +5,9 @@
 
 // Since we do code generation on a function level this needs to error out since
 // the subtarget feature won't be available.
-__m256d wombat(__m128i a) {
+__m128 wombat(__m128i a) {
   if (__builtin_cpu_supports("avx"))
-return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}}
+return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}}
   else
-return (__m256d){0, 0, 0, 0};
+return (__m128){0, 0};
 }
Index: cfe/trunk/test/CodeGen/sse2-builtins.c
===
--- cfe/trunk/test/CodeGen/sse2-builtins.c
+++ cfe/trunk/test/CodeGen/sse2-builtins.c
@@ -415,7 +415,8 @@
 
 __m128d test_mm_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm_cvtepi32_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}})
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
   return _mm_cvtepi32_pd(A);
 }
 
@@ -445,7 +446,8 @@
 
 __m128d test_mm_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm_cvtps_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}})
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> 
+  // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
   return _mm_cvtps_pd(A);
 }
 
Index: cfe/trunk/test/CodeGen/avx-builtins.c
===
--- cfe/trunk/test/CodeGen/avx-builtins.c
+++ cfe/trunk/test/CodeGen/avx-builtins.c
@@ -250,7 +250,7 @@
 
 __m256d test_mm256_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm256_cvtepi32_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}})
+  // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double>
   return _mm256_cvtepi32_pd(A);
 }
 
@@ -280,7 +280,7 @@
 
 __m256d test_mm256_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm256_cvtps_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}})
+  // CHECK: fpext <4 x float> %{{.*}} to <4 x double>
   return _mm256_cvtps_pd(A);
 }
 
Index: cfe/trunk/test/CodeGen/builtins-x86.c
===
--- cfe/trunk/test/CodeGen/builtins-x86.c
+++ cfe/trunk/test/CodeGen/builtins-x86.c
@@ -325,7 +325,6 @@
   tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
   tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d);
   tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d);
-  tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i);
   tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i);
   tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d);
   tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d);
@@ -338,7 +337,6 @@
   tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
 #endif
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
-  tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
   (void) __builtin_ia32_lfence();
@@ -423,11 +421,9 @@
   tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
   tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
-  tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
-  tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
   tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
Index: cfe/trunk/lib/Headers/avxintrin.h
===
--- cfe/trunk/lib/Headers/avxintrin.h
+++ cfe/trunk/lib/Headers/avxintrin.h
@@ -2050,7 +2050,7 @@
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_pd(__m128i __a)
 {
-  return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
+  return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
 }
 
 /// \brief Converts a 

r270499 - [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

2016-05-23 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Mon May 23 17:13:02 2016
New Revision: 270499

URL: http://llvm.org/viewvc/llvm-project?rev=270499=rev
Log:
[X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

Both the (V)CVTDQ2PD(Y) (i32 to f64) and (V)CVTPS2PD(Y) (f32 to f64) conversion 
instructions are lossless and can be safely represented as generic 
__builtin_convertvector calls instead of x86 intrinsics without affecting final 
codegen.

This patch removes the clang builtins and their use in the sse2/avx headers - a 
future patch will deal with removing the llvm intrinsics, but that will require 
a bit more work.

Differential Revision: http://reviews.llvm.org/D20528

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/test/CodeGen/avx-builtins.c
cfe/trunk/test/CodeGen/builtins-x86.c
cfe/trunk/test/CodeGen/sse2-builtins.c
cfe/trunk/test/CodeGen/target-builtin-error-2.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=270499=270498=270499=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 23 17:13:02 2016
@@ -330,7 +330,6 @@ TARGET_BUILTIN(__builtin_ia32_movntdq, "
 TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd, "V2dV4i", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
@@ -338,7 +337,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2dq,
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd, "V2dV4f", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
 TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
@@ -466,11 +464,9 @@ TARGET_BUILTIN(__builtin_ia32_blendvps25
 TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")

Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=270499=270498=270499=diff
==
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Mon May 23 17:13:02 2016
@@ -2050,7 +2050,7 @@ _mm256_insert_epi64(__m256i __a, long lo
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_pd(__m128i __a)
 {
-  return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
+  return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
 }
 
 /// \brief Converts a vector of [8 x i32] into a vector of [8 x float].
@@ -2102,7 +2102,7 @@ _mm256_cvtps_epi32(__m256 __a)
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_cvtps_pd(__m128 __a)
 {
-  return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
+  return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=270499=270498=270499=diff
==
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Mon May 23 17:13:02 2016
@@ -386,13 +386,15 @@ _mm_cvtpd_ps(__m128d __a)
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtps_pd(__m128 __a)
 {
-  return __builtin_ia32_cvtps2pd((__v4sf)__a);
+  return (__m128d) __builtin_convertvector(
+  __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtepi32_pd(__m128i __a)
 {
-  return __builtin_ia32_cvtdq2pd((__v4si)__a);
+  return (__m128d) __builtin_convertvector(
+  

Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

2016-05-23 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20528#437165, @mkuper wrote:

> Presumably, the fast-isel lowering of the IR pattern is already correct, and 
> in any case, it isn't affected by this patch.
>  I just want to make sure we don't regress the optimized DAG codegen - that 
> is, it still produces the instruction we'd expect from the intrinsic (or 
> something at least as good).


The existing llvm\test\CodeGen\X86\vec_fpext.ll and 
llvm\test\CodeGen\X86\vec_int_to_fp.ll already demonstrate the correct 
optimized DAG codegen using the same IR as output in the 
clang\test\CodeGen\*-builtins.c here.

Also, the aim is to keep the llvm\test\CodeGen\X86\*-intrinsics-fast-isel.ll 
tests in sync with the llvm\tools\clang\test\CodeGen\*-builtins.c equivalents.


Repository:
  rL LLVM

http://reviews.llvm.org/D20528



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

2016-05-23 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20528#437117, @mkuper wrote:

> Sorry, I didn't intend to imply the rest of the llvm work is necessary for 
> this to go in. Just that I'd be happier with this patch knowing that we have 
> a regression test for doing the (shuffle + fpext, say) lowering correctly. I 
> didn't even mean fast-isel, only the DAG.


The fast-isel tests are the most self contained (and are useful to show the 
non-optimized codegen for every intrinsic in the headers). I can submit them 
now if you wish.


Repository:
  rL LLVM

http://reviews.llvm.org/D20528



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

2016-05-23 Thread Simon Pilgrim via cfe-commits
RKSimon updated this revision to Diff 58146.

Repository:
  rL LLVM

http://reviews.llvm.org/D20528

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  test/CodeGen/avx-builtins.c
  test/CodeGen/builtins-x86.c
  test/CodeGen/sse2-builtins.c
  test/CodeGen/target-builtin-error-2.c

Index: test/CodeGen/target-builtin-error-2.c
===
--- test/CodeGen/target-builtin-error-2.c
+++ test/CodeGen/target-builtin-error-2.c
@@ -5,9 +5,9 @@
 
 // Since we do code generation on a function level this needs to error out since
 // the subtarget feature won't be available.
-__m256d wombat(__m128i a) {
+__m128 wombat(__m128i a) {
   if (__builtin_cpu_supports("avx"))
-return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}}
+return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}}
   else
-return (__m256d){0, 0, 0, 0};
+return (__m128){0, 0};
 }
Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -415,7 +415,8 @@
 
 __m128d test_mm_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm_cvtepi32_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}})
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
   return _mm_cvtepi32_pd(A);
 }
 
@@ -445,7 +446,8 @@
 
 __m128d test_mm_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm_cvtps_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}})
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> 
+  // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
   return _mm_cvtps_pd(A);
 }
 
Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -325,7 +325,6 @@
   tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
   tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d);
   tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d);
-  tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i);
   tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i);
   tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d);
   tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d);
@@ -338,7 +337,6 @@
   tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
 #endif
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
-  tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
   (void) __builtin_ia32_lfence();
@@ -423,11 +421,9 @@
   tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
   tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
-  tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
-  tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
   tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
Index: test/CodeGen/avx-builtins.c
===
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -250,7 +250,7 @@
 
 __m256d test_mm256_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm256_cvtepi32_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}})
+  // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double>
   return _mm256_cvtepi32_pd(A);
 }
 
@@ -280,7 +280,7 @@
 
 __m256d test_mm256_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm256_cvtps_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}})
+  // CHECK: fpext <4 x float> %{{.*}} to <4 x double>
   return _mm256_cvtps_pd(A);
 }
 
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -386,13 +386,15 @@
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtps_pd(__m128 __a)
 {
-  return __builtin_ia32_cvtps2pd((__v4sf)__a);
+  return (__m128d) __builtin_convertvector(
+  __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtepi32_pd(__m128i __a)
 {
-  return __builtin_ia32_cvtdq2pd((__v4si)__a);
+  return (__m128d) __builtin_convertvector(
+  __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
Index: lib/Headers/avxintrin.h
===
--- lib/Headers/avxintrin.h
+++ 

Re: [PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

2016-05-23 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20528#436893, @mkuper wrote:

> This looks right, but we may lose some end-to-end tests, since right now we 
> have a clang-level test that checks the builtin is lowered to the intrinsic, 
> and (hopefully) a CG-level test that the intrinsic is lowered to the correct 
> instruction.
>  Do you know if there are already CG tests that check we correctly lower 
> these IR patterns to CVTPS2PD, etc? If not, could you add them?


I do have the relevant changes for 
llvm\test\CodeGen\X86\sse2-intrinsics-fast-isel.ll and 
llvm\test\CodeGen\X86\avx-intrinsics-fast-isel.ll (I spent most of last week 
adding them all.). Do you want me to setup a separate llvm patch for 
review? I'm not ready to do the rest of the llvm work (removal of the llvm 
intrinsics / auto-upgrade etc.). but the fast-isel changes are very simple.



Comment at: lib/Headers/emmintrin.h:390
@@ -390,1 +389,3 @@
+  return (__m128d) __builtin_convertvector(
+  __builtin_shufflevector((__v4sf __a, (__v4sf)__a, 0, 1), __v2df);
 }

mkuper wrote:
> It looks like there's a missing paren after the first __v4sf.
> How does the test compile? Or am I misreading?
Sorry, that's me 'fixing' clang-format which I stupidly forgot to run until 
just before submission.


Repository:
  rL LLVM

http://reviews.llvm.org/D20528



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D20528: [X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

2016-05-23 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: qcolombet, craig.topper, mkuper, andreadb, spatel.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

Both the (V)CVTDQ2PD(Y) (i32 to f64) and (V)CVTPS2PD(Y) (f32 to f64) conversion 
instructions are lossless and can be safely represented as generic 
__builtin_convertvector calls instead of x86 intrinsics.

This patch removes the clang builtins and their use in the sse2/avx headers - a 
future patch will deal with removing the llvm intrinsics, but that will require 
a bit more work.

Repository:
  rL LLVM

http://reviews.llvm.org/D20528

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avxintrin.h
  lib/Headers/emmintrin.h
  test/CodeGen/avx-builtins.c
  test/CodeGen/builtins-x86.c
  test/CodeGen/sse2-builtins.c
  test/CodeGen/target-builtin-error-2.c

Index: test/CodeGen/target-builtin-error-2.c
===
--- test/CodeGen/target-builtin-error-2.c
+++ test/CodeGen/target-builtin-error-2.c
@@ -5,9 +5,9 @@
 
 // Since we do code generation on a function level this needs to error out since
 // the subtarget feature won't be available.
-__m256d wombat(__m128i a) {
+__m128 wombat(__m128i a) {
   if (__builtin_cpu_supports("avx"))
-return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}}
+return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}}
   else
-return (__m256d){0, 0, 0, 0};
+return (__m128){0, 0};
 }
Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -415,7 +415,8 @@
 
 __m128d test_mm_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm_cvtepi32_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}})
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> 
+  // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
   return _mm_cvtepi32_pd(A);
 }
 
@@ -445,7 +446,8 @@
 
 __m128d test_mm_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm_cvtps_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}})
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> 
+  // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
   return _mm_cvtps_pd(A);
 }
 
Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -325,7 +325,6 @@
   tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
   tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d);
   tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d);
-  tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i);
   tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i);
   tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d);
   tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d);
@@ -338,7 +337,6 @@
   tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
 #endif
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
-  tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
   (void) __builtin_ia32_lfence();
@@ -423,11 +421,9 @@
   tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
   tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
-  tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
-  tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
   tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
Index: test/CodeGen/avx-builtins.c
===
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -250,7 +250,7 @@
 
 __m256d test_mm256_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm256_cvtepi32_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}})
+  // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double>
   return _mm256_cvtepi32_pd(A);
 }
 
@@ -280,7 +280,7 @@
 
 __m256d test_mm256_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm256_cvtps_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}})
+  // CHECK: fpext <4 x float> %{{.*}} to <4 x double>
   return _mm256_cvtps_pd(A);
 }
 
Index: lib/Headers/emmintrin.h
===
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -386,13 +386,15 @@
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtps_pd(__m128 __a)
 {
-  return __builtin_ia32_cvtps2pd((__v4sf)__a);
+  return (__m128d) 

Re: [PATCH] D20468: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16

2016-05-21 Thread Simon Pilgrim via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL270330: [X86][AVX] Ensure zero-extension of 
_mm256_extract_epi8 and _mm256_extract_epi16 (authored by RKSimon).

Changed prior to commit:
  http://reviews.llvm.org/D20468?vs=57927=58045#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D20468

Files:
  cfe/trunk/lib/Headers/avxintrin.h
  cfe/trunk/test/CodeGen/avx-builtins.c

Index: cfe/trunk/test/CodeGen/avx-builtins.c
===
--- cfe/trunk/test/CodeGen/avx-builtins.c
+++ cfe/trunk/test/CodeGen/avx-builtins.c
@@ -314,21 +314,19 @@
   return _mm256_dp_ps(A, B, 7);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi8(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi8
   // CHECK: and i32 %{{.*}}, 31
   // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i8 %{{.*}} to i32
+  // CHECK: zext i8 %{{.*}} to i32
   return _mm256_extract_epi8(A, 32);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi16(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi16
   // CHECK: and i32 %{{.*}}, 15
   // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i16 %{{.*}} to i32
+  // CHECK: zext i16 %{{.*}} to i32
   return _mm256_extract_epi16(A, 16);
 }
 
Index: cfe/trunk/lib/Headers/avxintrin.h
===
--- cfe/trunk/lib/Headers/avxintrin.h
+++ cfe/trunk/lib/Headers/avxintrin.h
@@ -1875,13 +1875,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [3:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 16 bits of extended
+/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
 ///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
-  return __b[__imm & 15];
+  return (unsigned short)__b[__imm & 15];
 }
 
 /// \brief Takes a [32 x i8] vector and returns the vector element value
@@ -1897,13 +1897,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [4:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 8 bits of extended 
packed
-///data.
+/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
+///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
-  return __b[__imm & 31];
+  return (unsigned char)__b[__imm & 31];
 }
 
 #ifdef __x86_64__


Index: cfe/trunk/test/CodeGen/avx-builtins.c
===
--- cfe/trunk/test/CodeGen/avx-builtins.c
+++ cfe/trunk/test/CodeGen/avx-builtins.c
@@ -314,21 +314,19 @@
   return _mm256_dp_ps(A, B, 7);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi8(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi8
   // CHECK: and i32 %{{.*}}, 31
   // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i8 %{{.*}} to i32
+  // CHECK: zext i8 %{{.*}} to i32
   return _mm256_extract_epi8(A, 32);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi16(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi16
   // CHECK: and i32 %{{.*}}, 15
   // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i16 %{{.*}} to i32
+  // CHECK: zext i16 %{{.*}} to i32
   return _mm256_extract_epi16(A, 16);
 }
 
Index: cfe/trunk/lib/Headers/avxintrin.h
===
--- cfe/trunk/lib/Headers/avxintrin.h
+++ cfe/trunk/lib/Headers/avxintrin.h
@@ -1875,13 +1875,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [3:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 16 bits of extended
+/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
 ///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
-  return __b[__imm & 15];
+  return (unsigned short)__b[__imm & 15];
 }
 
 /// \brief Takes a [32 x i8] vector and returns the vector element value
@@ -1897,13 +1897,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [4:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 8 bits of extended packed
-///data.
+/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
+///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
-  return __b[__imm & 31];
+  return (unsigned char)__b[__imm & 31];
 }
 
 #ifdef 

r270330 - [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16

2016-05-21 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Sat May 21 16:14:35 2016
New Revision: 270330

URL: http://llvm.org/viewvc/llvm-project?rev=270330=rev
Log:
[X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16

Ensure _mm256_extract_epi8 and _mm256_extract_epi16 zero extend their i8/i16 
result to i32. This matches _mm_extract_epi8 and _mm_extract_epi16.

Fix for PR27594

Differential Revision: http://reviews.llvm.org/D20468

Modified:
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/test/CodeGen/avx-builtins.c

Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=270330=270329=270330=diff
==
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Sat May 21 16:14:35 2016
@@ -1875,13 +1875,13 @@ _mm256_extract_epi32(__m256i __a, const
 /// \param __imm
 ///An immediate integer operand with bits [3:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 16 bits of extended
+/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
 ///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
-  return __b[__imm & 15];
+  return (unsigned short)__b[__imm & 15];
 }
 
 /// \brief Takes a [32 x i8] vector and returns the vector element value
@@ -1897,13 +1897,13 @@ _mm256_extract_epi16(__m256i __a, const
 /// \param __imm
 ///An immediate integer operand with bits [4:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 8 bits of extended 
packed
-///data.
+/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
+///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
-  return __b[__imm & 31];
+  return (unsigned char)__b[__imm & 31];
 }
 
 #ifdef __x86_64__

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270330=270329=270330=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Sat May 21 16:14:35 2016
@@ -314,21 +314,19 @@ __m256 test_mm256_dp_ps(__m256 A, __m256
   return _mm256_dp_ps(A, B, 7);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi8(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi8
   // CHECK: and i32 %{{.*}}, 31
   // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i8 %{{.*}} to i32
+  // CHECK: zext i8 %{{.*}} to i32
   return _mm256_extract_epi8(A, 32);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi16(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi16
   // CHECK: and i32 %{{.*}}, 15
   // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i16 %{{.*}} to i32
+  // CHECK: zext i16 %{{.*}} to i32
   return _mm256_extract_epi16(A, 16);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20468: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16

2016-05-20 Thread Simon Pilgrim via cfe-commits
RKSimon added a comment.

In http://reviews.llvm.org/D20468#435522, @mkuper wrote:

> Could you point me to where in the documentation it says they must be 
> zero-extended?
>  The Intel intrinsics guide actually has them with shorter return types:
>
>   __int8 _mm256_extract_epi8 (__m256i a, const int index)
>   __int16 _mm256_extract_epi16 (__m256i a, const int index)


And the gcc version has them wrapped to the _mm_extract_epi* intrinsics which 
map to the real 128-bit instructions which do zero-extend.

I'm open to changing the return types in the headers instead, but really I'd 
expect the mm256 versions to zero extend like the older mm versions.


Repository:
  rL LLVM

http://reviews.llvm.org/D20468



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r270227 - [X86][AVX] Added _mm256_testc_si256/_mm256_testnzc_si256/_mm256_testz_si256 tests

2016-05-20 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri May 20 10:49:17 2016
New Revision: 270227

URL: http://llvm.org/viewvc/llvm-project?rev=270227=rev
Log:
[X86][AVX] Added _mm256_testc_si256/_mm256_testnzc_si256/_mm256_testz_si256 
tests

Modified:
cfe/trunk/test/CodeGen/avx-builtins.c

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270227=270226=270227=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Fri May 20 10:49:17 2016
@@ -1253,6 +1253,12 @@ int test_mm256_testc_ps(__m256 A, __m256
   return _mm256_testc_ps(A, B);
 }
 
+int test_mm256_testc_si256(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_testc_si256
+  // CHECK: call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> 
%{{.*}})
+  return _mm256_testc_si256(A, B);
+}
+
 int test_mm_testnzc_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_testnzc_pd
   // CHECK: call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}})
@@ -1277,6 +1283,12 @@ int test_mm256_testnzc_ps(__m256 A, __m2
   return _mm256_testnzc_ps(A, B);
 }
 
+int test_mm256_testnzc_si256(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_testnzc_si256
+  // CHECK: call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> 
%{{.*}})
+  return _mm256_testnzc_si256(A, B);
+}
+
 int test_mm_testz_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_testz_pd
   // CHECK: call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}})
@@ -1301,6 +1313,12 @@ int test_mm256_testz_ps(__m256 A, __m256
   return _mm256_testz_ps(A, B);
 }
 
+int test_mm256_testz_si256(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_testz_si256
+  // CHECK: call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> 
%{{.*}})
+  return _mm256_testz_si256(A, B);
+}
+
 __m256 test_mm256_undefined_ps() {
   // CHECK-LABEL: @test_mm256_undefined_ps
   // CHECK: ret <8 x float> undef


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D20468: [X86][AVX] Ensure zero-extension of _mm256_extract_epi8 and _mm256_extract_epi16

2016-05-20 Thread Simon Pilgrim via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: mkuper, craig.topper, kromanova, spatel.
RKSimon added a subscriber: cfe-commits.
RKSimon set the repository for this revision to rL LLVM.

Ensure _mm256_extract_epi8 and _mm256_extract_epi16 zero extend their i8/i16 
result to i32. This matches _mm_extract_epi8 and _mm_extract_epi16.

Fix for PR27594 

Katya - I've updated the doxygen comments for _mm256_extract_epi8 and 
_mm256_extract_epi16, I guess this will need to be updated in Sony's intrinsics 
document for the next regeneration?

Repository:
  rL LLVM

http://reviews.llvm.org/D20468

Files:
  lib/Headers/avxintrin.h
  test/CodeGen/avx-builtins.c

Index: test/CodeGen/avx-builtins.c
===
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -314,21 +314,19 @@
   return _mm256_dp_ps(A, B, 7);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi8(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi8
   // CHECK: and i32 %{{.*}}, 31
   // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i8 %{{.*}} to i32
+  // CHECK: zext i8 %{{.*}} to i32
   return _mm256_extract_epi8(A, 32);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi16(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi16
   // CHECK: and i32 %{{.*}}, 15
   // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i16 %{{.*}} to i32
+  // CHECK: zext i16 %{{.*}} to i32
   return _mm256_extract_epi16(A, 16);
 }
 
Index: lib/Headers/avxintrin.h
===
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -1875,13 +1875,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [3:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 16 bits of extended
+/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
 ///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
-  return __b[__imm & 15];
+  return (unsigned short)__b[__imm & 15];
 }
 
 /// \brief Takes a [32 x i8] vector and returns the vector element value
@@ -1897,13 +1897,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [4:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 8 bits of extended 
packed
-///data.
+/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
+///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
-  return __b[__imm & 31];
+  return (unsigned char)__b[__imm & 31];
 }
 
 #ifdef __x86_64__


Index: test/CodeGen/avx-builtins.c
===
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -314,21 +314,19 @@
   return _mm256_dp_ps(A, B, 7);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi8(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi8
   // CHECK: and i32 %{{.*}}, 31
   // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i8 %{{.*}} to i32
+  // CHECK: zext i8 %{{.*}} to i32
   return _mm256_extract_epi8(A, 32);
 }
 
-// FIXME: ZEXT instead of SEXT
 int test_mm256_extract_epi16(__m256i A) {
   // CHECK-LABEL: test_mm256_extract_epi16
   // CHECK: and i32 %{{.*}}, 15
   // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}}
-  // CHECK: ext i16 %{{.*}} to i32
+  // CHECK: zext i16 %{{.*}} to i32
   return _mm256_extract_epi16(A, 16);
 }
 
Index: lib/Headers/avxintrin.h
===
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -1875,13 +1875,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [3:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 16 bits of extended
+/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
 ///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
-  return __b[__imm & 15];
+  return (unsigned short)__b[__imm & 15];
 }
 
 /// \brief Takes a [32 x i8] vector and returns the vector element value
@@ -1897,13 +1897,13 @@
 /// \param __imm
 ///An immediate integer operand with bits [4:0] determining which vector
 ///element is extracted and returned.
-/// \returns A 32-bit integer containing the extracted 8 bits of extended packed
-///data.
+/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
+///packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int 

r270212 - [X86][AVX] Added _mm256_extract_epi64 test

2016-05-20 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri May 20 07:57:21 2016
New Revision: 270212

URL: http://llvm.org/viewvc/llvm-project?rev=270212=rev
Log:
[X86][AVX] Added _mm256_extract_epi64 test

Modified:
cfe/trunk/test/CodeGen/avx-builtins.c

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270212=270211=270212=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Fri May 20 07:57:21 2016
@@ -339,6 +339,13 @@ int test_mm256_extract_epi32(__m256i A)
   return _mm256_extract_epi32(A, 8);
 }
 
+long long test_mm256_extract_epi64(__m256i A) {
+  // CHECK-LABEL: test_mm256_extract_epi64
+  // CHECK: and i32 %{{.*}}, 3
+  // CHECK: extractelement <4 x i64> %{{.*}}, i32 %{{.*}}
+  return _mm256_extract_epi64(A, 5);
+}
+
 __m128d test_mm256_extractf128_pd(__m256d A) {
   // CHECK-LABEL: test_mm256_extractf128_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x 
i32> 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r270210 - [X86][AVX] Full set of AVX intrinsics tests

2016-05-20 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Fri May 20 07:41:02 2016
New Revision: 270210

URL: http://llvm.org/viewvc/llvm-project?rev=270210=rev
Log:
[X86][AVX] Full set of AVX intrinsics tests

llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll will be synced to this

Modified:
cfe/trunk/test/CodeGen/avx-builtins.c

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=270210=270209=270210=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Fri May 20 07:41:02 2016
@@ -1,84 +1,1297 @@
-// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx 
-emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx 
-emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx 
-fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
 
-#include 
+#include 
 
-//
-// Test LLVM IR codegen of shuffle instructions
-//
+// NOTE: This should match the tests in 
llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
 
-__m256 test__mm256_loadu_ps(void* p) {
-  // CHECK: load <8 x float>, <8 x float>* %{{.*}}, align 1
-  return _mm256_loadu_ps(p);
+__m256d test_mm256_add_pd(__m256d A, __m256d B) {
+  // CHECK-LABEL: test_mm256_add_pd
+  // CHECK: fadd <4 x double>
+  return _mm256_add_pd(A, B);
 }
 
-__m256d test__mm256_loadu_pd(void* p) {
-  // CHECK: load <4 x double>, <4 x double>* %{{.*}}, align 1
-  return _mm256_loadu_pd(p);
+__m256 test_mm256_add_ps(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_add_ps
+  // CHECK: fadd <8 x float>
+  return _mm256_add_ps(A, B);
 }
 
-__m256i test__mm256_loadu_si256(void* p) {
-  // CHECK: load <4 x i64>, <4 x i64>* %{{.+}}, align 1
-  return _mm256_loadu_si256(p);
+__m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
+  // CHECK-LABEL: test_mm256_addsub_pd
+  // CHECK: call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> 
%{{.*}}, <4 x double> %{{.*}})
+  return _mm256_addsub_pd(A, B);
 }
 
-int test_extract_epi32(__m256i __a) {
-  // CHECK-LABEL: @test_extract_epi32
-  // CHECK: [[SHIFT1:%[^ ]+]] = and i32 %{{.*}}, 7
-  // CHECK: extractelement <8 x i32> %{{.*}}, i32 [[SHIFT1]]
-  return _mm256_extract_epi32(__a, 8);
+__m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_addsub_ps
+  // CHECK: call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, 
<8 x float> %{{.*}})
+  return _mm256_addsub_ps(A, B);
 }
 
-int test_extract_epi16(__m256i __a) {
-  // CHECK-LABEL: @test_extract_epi16
-  // CHECK: [[SHIFT2:%[^ ]+]] = and i32 %{{.*}}, 15
-  // CHECK: extractelement <16 x i16> %{{.*}}, i32 [[SHIFT2]]
-  return _mm256_extract_epi16(__a, 16);
+__m256d test_mm256_and_pd(__m256d A, __m256d B) {
+  // CHECK-LABEL: test_mm256_and_pd
+  // CHECK: and <4 x i64>
+  return _mm256_and_pd(A, B);
 }
 
-int test_extract_epi8(__m256i __a) {
-  // CHECK-LABEL: @test_extract_epi8
-  // CHECK: [[SHIFT3:%[^ ]+]] = and i32 %{{.*}}, 31
-  // CHECK: extractelement <32 x i8> %{{.*}}, i32 [[SHIFT3]]
-  return _mm256_extract_epi8(__a, 32);
+__m256 test_mm256_and_ps(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_and_ps
+  // CHECK: and <8 x i32>
+  return _mm256_and_ps(A, B);
 }
 
-__m256d test_256_blend_pd(__m256d __a, __m256d __b) {
-  // CHECK-LABEL: @test_256_blend_pd
+__m256d test_mm256_andnot_pd(__m256d A, __m256d B) {
+  // CHECK-LABEL: test_mm256_andnot_pd
+  // CHECK: xor <4 x i64> %{{.*}}, 
+  // CHECK: and <4 x i64>
+  return _mm256_andnot_pd(A, B);
+}
+
+__m256 test_mm256_andnot_ps(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_andnot_ps
+  // CHECK: xor <8 x i32> %{{.*}}, 
+  // CHECK: and <8 x i32>
+  return _mm256_andnot_ps(A, B);
+}
+
+__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
+  // CHECK-LABEL: test_mm256_blend_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x 
i32> 
-  return _mm256_blend_pd(__a, __b, 0x35);
+  return _mm256_blend_pd(A, B, 0x35);
 }
 
-__m256 test_256_blend_ps(__m256 __a, __m256 __b) {
-  // CHECK-LABEL: @test_256_blend_ps
+__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_blend_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 

-  return _mm256_blend_ps(__a, __b, 0x35);
+  return _mm256_blend_ps(A, B, 0x35);
+}
+
+__m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) {
+  // CHECK-LABEL: test_mm256_blendv_pd
+  // CHECK: call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> 
%{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
+  return _mm256_blendv_pd(V1, V2, V3);
+}
+
+__m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) {
+  // CHECK-LABEL: test_mm256_blendv_ps
+  // CHECK: call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> 

r270079 - [X86][SSE2] Fixed shuffle of results in _mm_cmpnge_sd/_mm_cmpngt_sd tests

2016-05-19 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu May 19 11:48:59 2016
New Revision: 270079

URL: http://llvm.org/viewvc/llvm-project?rev=270079=rev
Log:
[X86][SSE2] Fixed shuffle of results in _mm_cmpnge_sd/_mm_cmpngt_sd tests

Modified:
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270079=270078=270079=diff
==
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 11:48:59 2016
@@ -306,6 +306,10 @@ __m128d test_mm_cmpnge_pd(__m128d A, __m
 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_cmpnge_sd
   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}}, i8 6)
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 1
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_cmpnge_sd(A, B);
 }
 
@@ -318,6 +322,10 @@ __m128d test_mm_cmpngt_pd(__m128d A, __m
 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_cmpngt_sd
   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}}, i8 5)
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 1
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_cmpngt_sd(A, B);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r270043 - [X86][SSE2] Added _mm_move_* tests

2016-05-19 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu May 19 06:18:49 2016
New Revision: 270043

URL: http://llvm.org/viewvc/llvm-project?rev=270043=rev
Log:
[X86][SSE2] Added _mm_move_* tests

Modified:
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270043=270042=270043=diff
==
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 06:18:49 2016
@@ -721,6 +721,21 @@ __m128d test_mm_min_sd(__m128d A, __m128
   return _mm_min_sd(A, B);
 }
 
+__m128i test_mm_move_epi64(__m128i A) {
+  // CHECK-LABEL: test_mm_move_epi64
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> 
+  return _mm_move_epi64(A);
+}
+
+__m128d test_mm_move_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_move_sd
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 1
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
+  return _mm_move_sd(A, B);
+}
+
 int test_mm_movemask_epi8(__m128i A) {
   // CHECK-LABEL: test_mm_movemask_epi8
   // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r270042 - [X86][SSE2] Added _mm_cast* and _mm_set* tests

2016-05-19 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu May 19 06:03:48 2016
New Revision: 270042

URL: http://llvm.org/viewvc/llvm-project?rev=270042=rev
Log:
[X86][SSE2] Added _mm_cast* and _mm_set* tests

Modified:
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270042=270041=270042=diff
==
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 06:03:48 2016
@@ -121,6 +121,42 @@ __m128i test_mm_bsrli_si128(__m128i A) {
   return _mm_bsrli_si128(A, 5);
 }
 
+__m128 test_mm_castpd_ps(__m128d A) {
+  // CHECK-LABEL: test_mm_castpd_ps
+  // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
+  return _mm_castpd_ps(A);
+}
+
+__m128i test_mm_castpd_si128(__m128d A) {
+  // CHECK-LABEL: test_mm_castpd_si128
+  // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
+  return _mm_castpd_si128(A);
+}
+
+__m128d test_mm_castps_pd(__m128 A) {
+  // CHECK-LABEL: test_mm_castps_pd
+  // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
+  return _mm_castps_pd(A);
+}
+
+__m128i test_mm_castps_si128(__m128 A) {
+  // CHECK-LABEL: test_mm_castps_si128
+  // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
+  return _mm_castps_si128(A);
+}
+
+__m128d test_mm_castsi128_pd(__m128i A) {
+  // CHECK-LABEL: test_mm_castsi128_pd
+  // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
+  return _mm_castsi128_pd(A);
+}
+
+__m128 test_mm_castsi128_ps(__m128i A) {
+  // CHECK-LABEL: test_mm_castsi128_ps
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  return _mm_castsi128_ps(A);
+}
+
 void test_mm_clflush(void* A) {
   // CHECK-LABEL: test_mm_clflush
   // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
@@ -778,6 +814,206 @@ __m128i test_mm_sad_epu8(__m128i A, __m1
   return _mm_sad_epu8(A, B);
 }
 
+__m128i test_mm_set_epi8(char A, char B, char C, char D,
+ char E, char F, char G, char H,
+ char I, char J, char K, char L,
+ char M, char N, char O, char P) {
+  // CHECK-LABEL: test_mm_set_epi8
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
+}
+
+__m128i test_mm_set_epi16(short A, short B, short C, short D,
+  short E, short F, short G, short H) {
+  // CHECK-LABEL: test_mm_set_epi16
+  // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
+  return _mm_set_epi16(A, B, C, D, E, F, G, H);
+}
+
+__m128i test_mm_set_epi32(int A, int B, int C, int D) {
+  // CHECK-LABEL: test_mm_set_epi32
+  // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
+  return _mm_set_epi32(A, B, C, D);
+}
+
+__m128i test_mm_set_epi64(__m64 A, __m64 B) {
+  // CHECK-LABEL: test_mm_set_epi64
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
+  return _mm_set_epi64(A, B);
+}
+
+__m128i test_mm_set_epi64x(long long A, long long B) {
+  // CHECK-LABEL: test_mm_set_epi64x
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
+  return _mm_set_epi64x(A, B);
+}
+
+__m128d test_mm_set_pd(double A, double B) 

r270034 - [X86][SSE2] Sync with llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

2016-05-19 Thread Simon Pilgrim via cfe-commits
Author: rksimon
Date: Thu May 19 04:52:59 2016
New Revision: 270034

URL: http://llvm.org/viewvc/llvm-project?rev=270034=rev
Log:
[X86][SSE2] Sync with llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

Modified:
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=270034=270033=270034=diff
==
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Thu May 19 04:52:59 2016
@@ -6,6 +6,8 @@
 
 #include 
 
+// NOTE: This should match the tests in 
llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+
 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_add_epi8
   // CHECK: add <16 x i8>
@@ -38,31 +40,34 @@ __m128d test_mm_add_pd(__m128d A, __m128
 
 __m128d test_mm_add_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_add_sd
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
   // CHECK: fadd double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_add_sd(A, B);
 }
 
 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi8
-  // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b
+  // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> 
%{{.*}})
   return _mm_adds_epi8(A, B);
 }
 
 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w
+  // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}})
   return _mm_adds_epi16(A, B);
 }
 
 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epu8
-  // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b
+  // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x 
i8> %{{.*}})
   return _mm_adds_epu8(A, B);
 }
 
 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epu16
-  // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w
+  // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x 
i16> %{{.*}})
   return _mm_adds_epu16(A, B);
 }
 
@@ -78,15 +83,29 @@ __m128i test_mm_and_si128(__m128i A, __m
   return _mm_and_si128(A, B);
 }
 
+__m128d test_mm_andnot_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_andnot_pd
+  // CHECK: xor <4 x i32> %{{.*}}, 
+  // CHECK: and <4 x i32>
+  return _mm_andnot_pd(A, B);
+}
+
+__m128i test_mm_andnot_si128(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_andnot_si128
+  // CHECK: xor <2 x i64> %{{.*}}, 
+  // CHECK: and <2 x i64>
+  return _mm_andnot_si128(A, B);
+}
+
 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_avg_epu8
-  // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> 
%{{.*}})
   return _mm_avg_epu8(A, B);
 }
 
 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_avg_epu16
-  // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}})
   return _mm_avg_epu16(A, B);
 }
 
@@ -147,6 +166,10 @@ __m128d test_mm_cmpge_pd(__m128d A, __m1
 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_cmpge_sd
   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}}, i8 2)
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 1
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_cmpge_sd(A, B);
 }
 
@@ -177,6 +200,10 @@ __m128d test_mm_cmpgt_pd(__m128d A, __m1
 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_cmpgt_sd
   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}}, i8 1)
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 1
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_cmpgt_sd(A, B);
 }
 
@@ -308,73 +335,73 @@ __m128d test_mm_cmpunord_sd(__m128d A, _
 
 int test_mm_comieq_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_comieq_sd
-  // CHECK: call i32 @llvm.x86.sse2.comieq.sd
+  // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}})
   return _mm_comieq_sd(A, B);
 }
 
 int test_mm_comige_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_comige_sd
-  // CHECK: call i32 @llvm.x86.sse2.comige.sd
+  // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x 
double> %{{.*}})
   return _mm_comige_sd(A, B);
 }
 
 int 

<    4   5   6   7   8   9   10   >