GBuella created this revision.
GBuella added reviewers: craig.topper, uriel.k, RKSimon, andrew.w.kaylor,
spatel, scanon, efriedma.
Herald added a subscriber: cfe-commits.
This patch removes on optimization used with the TRUE/FALSE
predicates, as was suggested in https://reviews.llvm.org/D45616
for r335339.
The optimization was buggy, since r335339 used it also
for *_mask builtins, without actually applying the mask -- the
mask argument was just ignored.
Repository:
rC Clang
https://reviews.llvm.org/D48715
Files:
lib/CodeGen/CGBuiltin.cpp
test/CodeGen/avx-builtins.c
test/CodeGen/avx512f-builtins.c
test/CodeGen/avx512vl-builtins.c
Index: test/CodeGen/avx512vl-builtins.c
===================================================================
--- test/CodeGen/avx512vl-builtins.c
+++ test/CodeGen/avx512vl-builtins.c
@@ -1077,34 +1077,6 @@
return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0);
}
-__mmask8 test_mm256_cmp_ps_mask_true_uq(__m256 __A, __m256 __B) {
- // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_uq
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ);
-}
-
-__mmask8 test_mm256_cmp_ps_mask_true_us(__m256 __A, __m256 __B) {
- // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_us
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_US);
-}
-
-__mmask8 test_mm256_cmp_ps_mask_false_oq(__m256 __A, __m256 __B) {
- // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_oq
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ);
-}
-
-__mmask8 test_mm256_cmp_ps_mask_false_os(__m256 __A, __m256 __B) {
- // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_os
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OS);
-}
-
__mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_mask_cmp_ps_mask
// CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
@@ -1118,34 +1090,6 @@
return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0);
}
-__mmask8 test_mm_cmp_ps_mask_true_uq(__m128 __A, __m128 __B) {
- // CHECK-LABEL: @test_mm_cmp_ps_mask_true_uq
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ);
-}
-
-__mmask8 test_mm_cmp_ps_mask_true_us(__m128 __A, __m128 __B) {
- // CHECK-LABEL: @test_mm_cmp_ps_mask_true_us
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_US);
-}
-
-__mmask8 test_mm_cmp_ps_mask_false_oq(__m128 __A, __m128 __B) {
- // CHECK-LABEL: @test_mm_cmp_ps_mask_false_oq
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ);
-}
-
-__mmask8 test_mm_cmp_ps_mask_false_os(__m128 __A, __m128 __B) {
- // CHECK-LABEL: @test_mm_cmp_ps_mask_false_os
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OS);
-}
-
__mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_cmp_ps_mask
// CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
@@ -1160,34 +1104,6 @@
return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0);
}
-__mmask8 test_mm256_cmp_pd_mask_true_uq(__m256d __A, __m256d __B) {
- // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_uq
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ);
-}
-
-__mmask8 test_mm256_cmp_pd_mask_true_us(__m256d __A, __m256d __B) {
- // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_us
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_US);
-}
-
-__mmask8 test_mm256_cmp_pd_mask_false_oq(__m256d __A, __m256d __B) {
- // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_oq
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ);
-}
-
-__mmask8 test_mm256_cmp_pd_mask_false_os(__m256d __A, __m256d __B) {
- // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_os
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OS);
-}
-
__mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_mask_cmp_pd_mask
// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
@@ -1202,34 +1118,6 @@
return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0);
}
-__mmask8 test_mm_cmp_pd_mask_true_uq(__m128d __A, __m128d __B) {
- // CHECK-LABEL: @test_mm_cmp_pd_mask_true_uq
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ);
-}
-
-__mmask8 test_mm_cmp_pd_mask_true_us(__m128d __A, __m128d __B) {
- // CHECK-LABEL: @test_mm_cmp_pd_mask_true_us
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_US);
-}
-
-__mmask8 test_mm_cmp_pd_mask_false_oq(__m128d __A, __m128d __B) {
- // CHECK-LABEL: @test_mm_cmp_pd_mask_false_oq
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ);
-}
-
-__mmask8 test_mm_cmp_pd_mask_false_os(__m128d __A, __m128d __B) {
- // CHECK-LABEL: @test_mm_cmp_pd_mask_false_os
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OS);
-}
-
__mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_cmp_pd_mask
// CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
Index: test/CodeGen/avx512f-builtins.c
===================================================================
--- test/CodeGen/avx512f-builtins.c
+++ test/CodeGen/avx512f-builtins.c
@@ -1296,34 +1296,6 @@
return _mm512_cmp_ps_mask(a, b, 0);
}
-__mmask16 test_mm512_cmp_ps_mask_true_uq(__m512 a, __m512 b) {
- // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_uq
- // CHECK-NOT: call
- // CHECK: ret i16 -1
- return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_UQ);
-}
-
-__mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) {
- // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_us
- // CHECK-NOT: call
- // CHECK: ret i16 -1
- return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_US);
-}
-
-__mmask16 test_mm512_cmp_ps_mask_false_oq(__m512 a, __m512 b) {
- // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_oq
- // CHECK-NOT: call
- // CHECK: ret i16 0
- return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OQ);
-}
-
-__mmask16 test_mm512_cmp_ps_mask_false_os(__m512 a, __m512 b) {
- // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_os
- // CHECK-NOT: call
- // CHECK: ret i16 0
- return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OS);
-}
-
__mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmp_ps_mask
// CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}}
@@ -1350,34 +1322,6 @@
return _mm512_cmp_pd_mask(a, b, 0);
}
-__mmask8 test_mm512_cmp_pd_mask_true_uq(__m512d a, __m512d b) {
- // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_uq
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_UQ);
-}
-
-__mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) {
- // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_us
- // CHECK-NOT: call
- // CHECK: ret i8 -1
- return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_US);
-}
-
-__mmask8 test_mm512_cmp_pd_mask_false_oq(__m512d a, __m512d b) {
- // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_oq
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OQ);
-}
-
-__mmask8 test_mm512_cmp_pd_mask_false_os(__m512d a, __m512d b) {
- // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_os
- // CHECK-NOT: call
- // CHECK: ret i8 0
- return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OS);
-}
-
__mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmp_pd_mask
// CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}}
Index: test/CodeGen/avx-builtins.c
===================================================================
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -1419,99 +1419,3 @@
// CHECK: extractelement <8 x float> %{{.*}}, i32 0
return _mm256_cvtss_f32(__a);
}
-
-__m256 test_mm256_cmp_ps_true(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_true
- // CHECK: ret <8 x float> <float 0xFFFFFFFFE0000000,
- return _mm256_cmp_ps(a, b, _CMP_TRUE_UQ);
-}
-
-__m256d test_mm256_cmp_pd_true(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_true
- // CHECK: ret <4 x double> <double 0xFFFFFFFFFFFFFFFF,
- return _mm256_cmp_pd(a, b, _CMP_TRUE_UQ);
-}
-
-__m256 test_mm256_cmp_ps_false(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_false
- // CHECK: ret <8 x float> zeroinitializer
- return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ);
-}
-
-__m256d test_mm256_cmp_pd_false(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_false
- // CHECK: ret <4 x double> zeroinitializer
- return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ);
-}
-
-__m256 test_mm256_cmp_ps_strue(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_strue
- // CHECK: ret <8 x float> <float 0xFFFFFFFFE0000000,
- return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
-}
-
-__m256d test_mm256_cmp_pd_strue(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_strue
- // CHECK: ret <4 x double> <double 0xFFFFFFFFFFFFFFFF,
- return _mm256_cmp_pd(a, b, _CMP_TRUE_US);
-}
-
-__m256 test_mm256_cmp_ps_sfalse(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_sfalse
- // CHECK: ret <8 x float> zeroinitializer
- return _mm256_cmp_ps(a, b, _CMP_FALSE_OS);
-}
-
-__m256d test_mm256_cmp_pd_sfalse(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_sfalse
- // CHECK: ret <4 x double> zeroinitializer
- return _mm256_cmp_pd(a, b, _CMP_FALSE_OS);
-}
-
-__m128 test_mm_cmp_ps_true(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_ps_true
- // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
- return _mm_cmp_ps(a, b, _CMP_TRUE_UQ);
-}
-
-__m128 test_mm_cmp_pd_true(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_pd_true
- // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
- return _mm_cmp_pd(a, b, _CMP_TRUE_UQ);
-}
-
-__m128 test_mm_cmp_ps_false(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_ps_false
- // CHECK: ret <4 x float> zeroinitializer
- return _mm_cmp_ps(a, b, _CMP_FALSE_OQ);
-}
-
-__m128 test_mm_cmp_pd_false(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_pd_false
- // CHECK: ret <4 x float> zeroinitializer
- return _mm_cmp_pd(a, b, _CMP_FALSE_OQ);
-}
-
-__m128 test_mm_cmp_ps_strue(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_ps_strue
- // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
- return _mm_cmp_ps(a, b, _CMP_TRUE_US);
-}
-
-__m128 test_mm_cmp_pd_strue(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_pd_strue
- // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
- return _mm_cmp_pd(a, b, _CMP_TRUE_US);
-}
-
-__m128 test_mm_cmp_ps_sfalse(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_ps_sfalse
- // CHECK: ret <4 x float> zeroinitializer
- return _mm_cmp_ps(a, b, _CMP_FALSE_OS);
-}
-
-__m128 test_mm_cmp_pd_sfalse(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_cmp_pd_sfalse
- // CHECK: ret <4 x float> zeroinitializer
- return _mm_cmp_pd(a, b, _CMP_FALSE_OS);
-}
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -10160,43 +10160,38 @@
// e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
FCmpInst::Predicate Pred;
switch (CC) {
- case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
- case 0x01: Pred = FCmpInst::FCMP_OLT; break;
- case 0x02: Pred = FCmpInst::FCMP_OLE; break;
- case 0x03: Pred = FCmpInst::FCMP_UNO; break;
- case 0x04: Pred = FCmpInst::FCMP_UNE; break;
- case 0x05: Pred = FCmpInst::FCMP_UGE; break;
- case 0x06: Pred = FCmpInst::FCMP_UGT; break;
- case 0x07: Pred = FCmpInst::FCMP_ORD; break;
- case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
- case 0x09: Pred = FCmpInst::FCMP_ULT; break;
- case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
- case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
- case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
- case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
- case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
- case 0x11: Pred = FCmpInst::FCMP_OLT; break;
- case 0x12: Pred = FCmpInst::FCMP_OLE; break;
- case 0x13: Pred = FCmpInst::FCMP_UNO; break;
- case 0x14: Pred = FCmpInst::FCMP_UNE; break;
- case 0x15: Pred = FCmpInst::FCMP_UGE; break;
- case 0x16: Pred = FCmpInst::FCMP_UGT; break;
- case 0x17: Pred = FCmpInst::FCMP_ORD; break;
- case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
- case 0x19: Pred = FCmpInst::FCMP_ULT; break;
- case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
- case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
- case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
- case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- case 0x0b: // FALSE_OQ
- case 0x1b: // FALSE_OS
- return llvm::Constant::getNullValue(ConvertType(E->getType()));
- case 0x0f: // TRUE_UQ
- case 0x1f: // TRUE_US
- return llvm::Constant::getAllOnesValue(ConvertType(E->getType()));
-
+ case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x01: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x02: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x03: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x04: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x05: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x06: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x07: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x09: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
+ case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x0f: Pred = FCmpInst::FCMP_TRUE; break;
+ case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x11: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x12: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x13: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x14: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x15: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x16: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x17: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x19: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
+ case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x1f: Pred = FCmpInst::FCMP_TRUE; break;
default: llvm_unreachable("Unhandled CC");
}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits