https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/181110
>From 738e835037ef59e2693cbf046f45eab459d44fce Mon Sep 17 00:00:00 2001 From: Guy David <[email protected]> Date: Wed, 11 Feb 2026 16:20:41 +0200 Subject: [PATCH] [ValueTracking] Extend computeConstantRange for add/sub, sext/zext/trunc Recursively compute operand ranges for add/sub and propagate ranges through sext/zext/trunc. For add/sub, the computed range is intersected with any existing range from setLimitsForBinOp, and NSW/NUW flags are used via addWithNoWrap/ subWithNoWrap to tighten bounds. The motivation is to enable further folding of reduce.add expressions in comparisons, where the result range can be bounded by the input element ranges. --- clang/test/CodeGen/isfpclass.c | 4 +- clang/test/Headers/__clang_hip_math.hip | 48 +++++----- clang/test/Headers/wasm.c | 32 +++---- llvm/lib/Analysis/ValueTracking.cpp | 67 +++++++++++-- llvm/test/Analysis/BasicAA/range.ll | 66 +++++++++++++ llvm/test/CodeGen/AMDGPU/sdiv64.ll | 27 ++---- llvm/test/CodeGen/AMDGPU/srem64.ll | 95 +++++++++---------- llvm/test/CodeGen/AMDGPU/udiv64.ll | 34 +++---- llvm/test/CodeGen/AMDGPU/urem64.ll | 50 +++++----- llvm/test/CodeGen/PowerPC/add_cmp.ll | 12 +-- llvm/test/Transforms/InstCombine/add.ll | 4 +- llvm/test/Transforms/InstCombine/fls.ll | 2 +- llvm/test/Transforms/InstCombine/icmp-add.ll | 3 +- llvm/test/Transforms/InstCombine/sadd_sat.ll | 10 +- .../InstCombine/saturating-add-sub.ll | 5 +- llvm/unittests/Analysis/ValueTrackingTest.cpp | 91 ++++++++++++++++++ 16 files changed, 363 insertions(+), 187 deletions(-) diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c index 4c6d556e008e5..1465b43149fcc 100644 --- a/clang/test/CodeGen/isfpclass.c +++ b/clang/test/CodeGen/isfpclass.c @@ -136,7 +136,7 @@ typedef double __attribute__((ext_vector_type(4))) double4; typedef int __attribute__((ext_vector_type(4))) int4; typedef long __attribute__((ext_vector_type(4))) long4; -// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32( +// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32( // CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer @@ -147,7 +147,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) { return __builtin_isfpclass(x, 3 /*NaN*/); } -// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32( +// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32( // CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]] diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 68a8666e41856..0a9c757aabf55 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -2653,7 +2653,7 @@ extern "C" __device__ int test_ilogb(double x) { return ilogb(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2666,7 +2666,7 @@ extern "C" __device__ int test_ilogb(double x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 1 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2674,7 +2674,7 @@ extern "C" __device__ int test_ilogb(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2682,7 +2682,7 @@ extern "C" __device__ int test_ilogb(double x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finitef( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___finitef( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]]) @@ -2694,7 +2694,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { return __finitef(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2707,7 +2707,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 1 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2715,7 +2715,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2723,7 +2723,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finite( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___finite( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]]) @@ -2735,7 +2735,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { return __finite(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2748,7 +2748,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2756,7 +2756,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2764,7 +2764,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinff( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isinff( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]]) @@ -2776,7 +2776,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { return __isinff(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2789,7 +2789,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2797,7 +2797,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2805,7 +2805,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinf( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isinf( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]]) @@ -2817,7 +2817,7 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) { return __isinf(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 @@ -2829,21 +2829,21 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnanf( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isnanf( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 @@ -2854,7 +2854,7 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) { return __isnanf(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 @@ -2866,21 +2866,21 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnan( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isnan( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c index 2545a014e4340..fdce091fe640e 100644 --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -1234,7 +1234,7 @@ v128_t test_u16x8_ge(v128_t a, v128_t b) { return wasm_u16x8_ge(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_eq( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_eq( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]] @@ -1245,7 +1245,7 @@ v128_t test_i32x4_eq(v128_t a, v128_t b) { return wasm_i32x4_eq(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ne( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_ne( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[A]], [[B]] @@ -1256,7 +1256,7 @@ v128_t test_i32x4_ne(v128_t a, v128_t b) { return wasm_i32x4_ne(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_lt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_lt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A]], [[B]] @@ -1267,7 +1267,7 @@ v128_t test_i32x4_lt(v128_t a, v128_t b) { return wasm_i32x4_lt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_lt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_lt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A]], [[B]] @@ -1278,7 +1278,7 @@ v128_t test_u32x4_lt(v128_t a, v128_t b) { return wasm_u32x4_lt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_gt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_gt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A]], [[B]] @@ -1289,7 +1289,7 @@ v128_t test_i32x4_gt(v128_t a, v128_t b) { return wasm_i32x4_gt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_gt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_gt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A]], [[B]] @@ -1300,7 +1300,7 @@ v128_t test_u32x4_gt(v128_t a, v128_t b) { return wasm_u32x4_gt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_le( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_le( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A]], [[B]] @@ -1311,7 +1311,7 @@ v128_t test_i32x4_le(v128_t a, v128_t b) { return wasm_i32x4_le(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_le( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_le( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A]], [[B]] @@ -1322,7 +1322,7 @@ v128_t test_u32x4_le(v128_t a, v128_t b) { return wasm_u32x4_le(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ge( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_ge( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A]], [[B]] @@ -1333,7 +1333,7 @@ v128_t test_i32x4_ge(v128_t a, v128_t b) { return wasm_i32x4_ge(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_ge( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_ge( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A]], [[B]] @@ -1428,7 +1428,7 @@ v128_t test_i64x2_ge(v128_t a, v128_t b) { return wasm_i64x2_ge(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_eq( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_eq( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1441,7 +1441,7 @@ v128_t test_f32x4_eq(v128_t a, v128_t b) { return wasm_f32x4_eq(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ne( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_ne( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1454,7 +1454,7 @@ v128_t test_f32x4_ne(v128_t a, v128_t b) { return wasm_f32x4_ne(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_lt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_lt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1467,7 +1467,7 @@ v128_t test_f32x4_lt(v128_t a, v128_t b) { return wasm_f32x4_lt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_gt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_gt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1480,7 +1480,7 @@ v128_t test_f32x4_gt(v128_t a, v128_t b) { return wasm_f32x4_gt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_le( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_le( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1493,7 +1493,7 @@ v128_t test_f32x4_le(v128_t a, v128_t b) { return wasm_f32x4_le(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ge( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_ge( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 4d8f3153b726f..223917a9fb7aa 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -9800,10 +9800,15 @@ std::optional<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred, return std::nullopt; } -static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, - APInt &Upper, const InstrInfoQuery &IIQ, - bool PreferSignedRange) { - unsigned Width = Lower.getBitWidth(); +static ConstantRange getRangeForBinOp(const BinaryOperator &BO, + bool PreferSignedRange, bool UseInstrInfo, + AssumptionCache *AC, + const Instruction *CtxI, + const DominatorTree *DT, unsigned Depth) { + unsigned Width = BO.getType()->getScalarSizeInBits(); + InstrInfoQuery IIQ(UseInstrInfo); + APInt Lower = APInt(Width, 0); + APInt Upper = APInt(Width, 0); const APInt *C; switch (BO.getOpcode()) { case Instruction::Sub: @@ -10023,6 +10028,36 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, default: break; } + + ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper); + unsigned Opc = BO.getOpcode(); + bool IsDisjointOr = + Opc == Instruction::Or && cast<PossiblyDisjointInst>(&BO)->isDisjoint(); + if (Opc == Instruction::Add || Opc == Instruction::Sub || IsDisjointOr) { + // Limit recursion depth more aggressively for binary operations. + unsigned NewDepth = std::max(Depth + 1, MaxAnalysisRecursionDepth - 1); + ConstantRange LHS = + computeConstantRange(BO.getOperand(0), PreferSignedRange, UseInstrInfo, + AC, CtxI, DT, NewDepth); + ConstantRange RHS = + computeConstantRange(BO.getOperand(1), PreferSignedRange, UseInstrInfo, + AC, CtxI, DT, NewDepth); + unsigned NoWrapKind = 0; + // Only Add and Sub have no-wrap flags, not disjoint Or. + if (!IsDisjointOr) { + if (IIQ.hasNoUnsignedWrap(&BO)) + NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap; + if (IIQ.hasNoSignedWrap(&BO)) + NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap; + } + // Disjoint OR is semantically equivalent to Add. + ConstantRange OpCR = Opc == Instruction::Sub + ? LHS.subWithNoWrap(RHS, NoWrapKind) + : LHS.addWithNoWrap(RHS, NoWrapKind); + CR = CR.intersectWith(OpCR, PreferSignedRange ? ConstantRange::Signed + : ConstantRange::Unsigned); + } + return CR; } static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II, @@ -10219,11 +10254,25 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, InstrInfoQuery IIQ(UseInstrInfo); ConstantRange CR = ConstantRange::getFull(BitWidth); if (auto *BO = dyn_cast<BinaryOperator>(V)) { - APInt Lower = APInt(BitWidth, 0); - APInt Upper = APInt(BitWidth, 0); - // TODO: Return ConstantRange. - setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); - CR = ConstantRange::getNonEmpty(Lower, Upper); + CR = getRangeForBinOp(*BO, ForSigned, UseInstrInfo, AC, CtxI, DT, Depth); + } else if (isa<SExtInst>(V) || isa<ZExtInst>(V) || isa<TruncInst>(V)) { + auto *CastOp = cast<CastInst>(V); + ConstantRange OpCR = + computeConstantRange(CastOp->getOperand(0), ForSigned, UseInstrInfo, AC, + CtxI, DT, Depth + 1); + switch (CastOp->getOpcode()) { + case Instruction::SExt: + CR = OpCR.signExtend(BitWidth); + break; + case Instruction::ZExt: + CR = OpCR.zeroExtend(BitWidth); + break; + case Instruction::Trunc: + CR = OpCR.truncate(BitWidth); + break; + default: + llvm_unreachable("Unexpected cast opcode"); + } } else if (auto *II = dyn_cast<IntrinsicInst>(V)) CR = getRangeForIntrinsic(*II, UseInstrInfo); else if (auto *SI = dyn_cast<SelectInst>(V)) { diff --git a/llvm/test/Analysis/BasicAA/range.ll b/llvm/test/Analysis/BasicAA/range.ll index e5dfb60c8b878..a41fd63ee52f6 100644 --- a/llvm/test/Analysis/BasicAA/range.ll +++ b/llvm/test/Analysis/BasicAA/range.ll @@ -271,6 +271,72 @@ entry: ret i32 %load_ } +; CHECK-LABEL: Function: zext_propagate_range +; CHECK: NoAlias: i32* %gep, i32* %gep128 +define void @zext_propagate_range(ptr %p, i8 %idx) { + %narrow = and i8 %idx, 127 + %wide = zext i8 %narrow to i64 + %gep = getelementptr i32, ptr %p, i64 %wide + %gep128 = getelementptr i32, ptr %p, i64 128 + load i32, ptr %gep + load i32, ptr %gep128 + ret void +} + +; CHECK-LABEL: Function: sext_propagate_range +; CHECK: NoAlias: i32* %gep, i32* %gep128 +define void @sext_propagate_range(ptr %p, i8 %idx) { + %clamped = and i8 %idx, 100 + %wide = sext i8 %clamped to i64 + %gep = getelementptr i32, ptr %p, i64 %wide + %gep128 = getelementptr i32, ptr %p, i64 128 + load i32, ptr %gep + load i32, ptr %gep128 + ret void +} + +; CHECK-LABEL: Function: zext_add_range +; CHECK: NoAlias: i32* %gep, i32* %gep512 +define void @zext_add_range(ptr %p, i8 %x, i8 %y) { + %ext.x = zext i8 %x to i64 + %ext.y = zext i8 %y to i64 + %sum = add i64 %ext.x, %ext.y + %gep = getelementptr i32, ptr %p, i64 %sum + %gep512 = getelementptr i32, ptr %p, i64 512 + load i32, ptr %gep + load i32, ptr %gep512 + ret void +} + +; CHECK-LABEL: Function: zext_sub_range +; CHECK: NoAlias: i32* %gep, i32* %gep256 +; CHECK: NoAlias: i32* %gep, i32* %gepneg256 +define void @zext_sub_range(ptr %p, i8 %x, i8 %y) { + %ext.x = zext i8 %x to i64 + %ext.y = zext i8 %y to i64 + %diff = sub i64 %ext.x, %ext.y + %gep = getelementptr i32, ptr %p, i64 %diff + %gep256 = getelementptr i32, ptr %p, i64 256 + %gepneg256 = getelementptr i32, ptr %p, i64 -256 + load i32, ptr %gep + load i32, ptr %gep256 + load i32, ptr %gepneg256 + ret void +} + +; CHECK-LABEL: Function: trunc_propagate_range +; CHECK: NoAlias: i32* %gep, i32* %gep64 +define void @trunc_propagate_range(ptr %p, i64 %idx) { + %clamped = and i64 %idx, 63 + %narrow = trunc i64 %clamped to i8 + %wide = zext i8 %narrow to i64 + %gep = getelementptr i32, ptr %p, i64 %wide + %gep64 = getelementptr i32, ptr %p, i64 64 + load i32, ptr %gep + load i32, ptr %gep64 + ret void +} + declare void @llvm.assume(i1) !0 = !{ i32 0, i32 2 } diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index fdb20f372ab8d..d3a027f99947b 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -1275,12 +1275,11 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_addc_u32 s11, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[10:11], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[10:11], 63 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[12:13], exec +; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[8:9] +; GCN-IR-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1462,13 +1461,11 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, 24, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1653,14 +1650,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] -; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] +; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1755,12 +1750,10 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[0:1] ; GCN-IR-NEXT: v_mov_b32_e32 v9, v8 ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[0:1] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v5, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB13_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 02d2e6c1473ab..3bee2fa7da49a 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -1414,73 +1414,72 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-LABEL: s_test_srem_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 -; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_ashr_i32 s8, s3, 31 -; GCN-IR-NEXT: s_mov_b32 s9, s8 -; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s4, s2, s8 -; GCN-IR-NEXT: s_subb_u32 s5, s3, s8 +; GCN-IR-NEXT: s_ashr_i32 s6, s3, 31 +; GCN-IR-NEXT: s_mov_b32 s7, s6 +; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s4, s2, s6 +; GCN-IR-NEXT: s_subb_u32 s5, s3, s6 ; GCN-IR-NEXT: s_flbit_i32_b64 s14, s[4:5] -; GCN-IR-NEXT: s_add_u32 s2, s14, 0xffffffc5 -; GCN-IR-NEXT: s_addc_u32 s3, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[2:3], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 63 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[10:11], exec -; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_mov_b32 s9, 0 +; GCN-IR-NEXT: s_add_u32 s8, s14, 0xffffffc5 +; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 +; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] +; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 +; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s8, s2, 1 -; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 -; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 -; GCN-IR-NEXT: s_sub_i32 s2, 63, s2 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_lshl_b64 s[2:3], 24, s2 +; GCN-IR-NEXT: s_add_u32 s10, s8, 1 +; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 +; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s8 +; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10 ; GCN-IR-NEXT: s_add_u32 s12, s4, -1 ; GCN-IR-NEXT: s_addc_u32 s13, s5, -1 ; GCN-IR-NEXT: s_sub_u32 s14, 58, s14 ; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: s_mov_b32 s7, 0 +; GCN-IR-NEXT: s_mov_b32 s3, 0 ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 -; GCN-IR-NEXT: s_lshr_b32 s6, s3, 31 -; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[6:7] -; GCN-IR-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s6, s12, s10 -; GCN-IR-NEXT: s_subb_u32 s6, s13, s11 -; GCN-IR-NEXT: s_ashr_i32 s8, s6, 31 +; GCN-IR-NEXT: s_lshr_b32 s2, s7, 31 +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[2:3] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s2, s12, s10 +; GCN-IR-NEXT: s_subb_u32 s2, s13, s11 +; GCN-IR-NEXT: s_ashr_i32 s8, s2, 31 ; GCN-IR-NEXT: s_mov_b32 s9, s8 -; GCN-IR-NEXT: s_and_b32 s6, s8, 1 +; GCN-IR-NEXT: s_and_b32 s2, s8, 1 ; GCN-IR-NEXT: s_and_b64 s[16:17], s[8:9], s[4:5] ; GCN-IR-NEXT: s_sub_u32 s10, s10, s16 ; GCN-IR-NEXT: s_subb_u32 s11, s11, s17 ; GCN-IR-NEXT: s_add_u32 s14, s14, 1 ; GCN-IR-NEXT: s_addc_u32 s15, s15, 0 ; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 -; GCN-IR-NEXT: s_mov_b64 s[8:9], s[6:7] +; GCN-IR-NEXT: s_mov_b64 s[8:9], s[2:3] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow6 -; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[2:3] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[2:3], s[6:7] ; GCN-IR-NEXT: .LBB10_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s8 +; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 ; GCN-IR-NEXT: v_mul_hi_u32 v0, s4, v0 -; GCN-IR-NEXT: s_mul_i32 s6, s4, s9 -; GCN-IR-NEXT: s_mul_i32 s5, s5, s8 -; GCN-IR-NEXT: s_mul_i32 s4, s4, s8 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s6, v0 +; GCN-IR-NEXT: s_mul_i32 s7, s4, s7 +; GCN-IR-NEXT: s_mul_i32 s5, s5, s6 +; GCN-IR-NEXT: s_mul_i32 s4, s4, s6 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s7, v0 ; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s5, v0 ; GCN-IR-NEXT: v_sub_i32_e64 v0, vcc, 24, s4 ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 @@ -1612,12 +1611,10 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, 24, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1801,13 +1798,11 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1908,12 +1903,10 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB13_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 1c50f930facba..3f5be80b1efbd 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -912,12 +912,11 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] +; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1083,13 +1082,11 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[4:5] ; GCN-IR-NEXT: v_mov_b32_e32 v3, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1173,12 +1170,10 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB10_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4 @@ -1277,13 +1272,12 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1372,12 +1366,10 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 28e6627b87413..b6608b9f48a7a 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -926,12 +926,11 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] +; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1042,13 +1041,12 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1219,13 +1217,11 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB8_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1310,22 +1306,20 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e64 v2, s[4:5], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 ; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 48, v8 -; GCN-IR-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 48, v8 +; GCN-IR-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, s[4:5] ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3] +; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 -; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v1, 0, s[4:5] -; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 +; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[0:1], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 @@ -1369,11 +1363,11 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: .LBB9_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 -; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 +; GCN-IR-NEXT: v_or_b32_e32 v3, v5, v3 +; GCN-IR-NEXT: v_or_b32_e32 v2, v4, v2 ; GCN-IR-NEXT: .LBB9_6: ; %Flow5 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[4:5], 15 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 15 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; GCN-IR-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/PowerPC/add_cmp.ll b/llvm/test/CodeGen/PowerPC/add_cmp.ll index cbe16a498a538..c5cc071e0183d 100644 --- a/llvm/test/CodeGen/PowerPC/add_cmp.ll +++ b/llvm/test/CodeGen/PowerPC/add_cmp.ll @@ -30,27 +30,27 @@ entry: define zeroext i1 @addiCmpiUnsignedOverflow(i32 zeroext %x) { entry: - %add = add nuw i32 110, %x - %cmp = icmp ugt i32 %add, 100 + %add = add nuw i32 110, %x + %cmp = icmp ugt i32 %add, 200 ret i1 %cmp ; CHECK: === addiCmpiUnsignedOverflow ; CHECK: Optimized lowered selection DAG: %bb.0 'addiCmpiUnsignedOverflow:entry' ; CHECK: [[REG1:t[0-9]+]]: i32 = truncate {{t[0-9]+}} ; CHECK: [[REG2:t[0-9]+]]: i32 = add nuw [[REG1]], Constant:i32<110> -; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<100>, setugt:ch +; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<200>, setugt:ch } define zeroext i1 @addiCmpiSignedOverflow(i16 signext %x) { entry: - %add = add nsw i16 16, %x - %cmp = icmp sgt i16 %add, -32767 + %add = add nsw i16 16, %x + %cmp = icmp sgt i16 %add, 30 ret i1 %cmp ; CHECK: === addiCmpiSignedOverflow ; CHECK: Optimized lowered selection DAG: %bb.0 'addiCmpiSignedOverflow:entry' ; CHECK: [[REG1:t[0-9]+]]: i16 = truncate {{t[0-9]+}} ; CHECK: [[REG2:t[0-9]+]]: i16 = add nsw [[REG1]], Constant:i16<16> -; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<-32767>, setgt:ch +; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<30>, setgt:ch } diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index aa68dfb540064..9d19ff1d37c26 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -3274,9 +3274,7 @@ define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_poison2(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison2( ; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) -; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i8> [[O]], splat (i8 -1) -; CHECK-NEXT: [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], <i32 1, i32 poison> +; CHECK-NEXT: [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[C]] ; %o = or <2 x i8> %x, <i8 8, i8 8> diff --git a/llvm/test/Transforms/InstCombine/fls.ll b/llvm/test/Transforms/InstCombine/fls.ll index 68bc0a2fc8a1d..ea757268259f5 100644 --- a/llvm/test/Transforms/InstCombine/fls.ll +++ b/llvm/test/Transforms/InstCombine/fls.ll @@ -33,7 +33,7 @@ define i32 @flsnotconst(i64 %z) { ; CHECK-LABEL: @flsnotconst( ; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[Z:%.*]], i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[CTLZ]] to i32 -; CHECK-NEXT: [[GOO:%.*]] = sub nsw i32 64, [[TMP1]] +; CHECK-NEXT: [[GOO:%.*]] = sub nuw nsw i32 64, [[TMP1]] ; CHECK-NEXT: ret i32 [[GOO]] ; %goo = call i32 @flsl(i64 %z) diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 1b66a50c26e59..94499c91b8f87 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -3160,7 +3160,8 @@ define i1 @icmp_add_constant_with_constant_ult_to_slt_neg2(i8 range(i8 -4, 120) } ; Negative test: C2 is negative -define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -4, 10) %x) { +; Prevent constant fold by using the range [-10, 10). +define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -10, 10) %x) { ; CHECK-LABEL: @icmp_add_constant_with_constant_ult_to_slt_neg3( ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], -6 diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll index 6afb77d975b8c..3143d4addecc1 100644 --- a/llvm/test/Transforms/InstCombine/sadd_sat.ll +++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -824,11 +824,11 @@ entry: define i16 @or(i8 %X, i16 %Y) { ; CHECK-LABEL: @or( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[Y:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], -16 -; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[TMP2]]) -; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3]] to i16 -; CHECK-NEXT: ret i16 [[L12]] +; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3:%.*]] to i16 +; CHECK-NEXT: [[CONV14:%.*]] = or i16 [[Y:%.*]], -16 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[L12]], [[CONV14]] +; CHECK-NEXT: [[L13:%.*]] = call i16 @llvm.smin.i16(i16 [[SUB]], i16 127) +; CHECK-NEXT: ret i16 [[L13]] ; %conv10 = sext i8 %X to i16 %conv14 = or i16 %Y, 65520 diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index efa89db4af61a..dff1f09213864 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1111,8 +1111,7 @@ define <3 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1_poison(<3 x i8> %a) { ; Can be optimized if the add nuw RHS constant range handles non-splat vectors. define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2( -; CHECK-NEXT: [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 9> -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> splat (i8 9)) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], <i8 1, i8 0> ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, <i8 10, i8 9> @@ -1188,7 +1187,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8> ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2( ; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 8> ; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7) -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]]) +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %aa = add nsw <2 x i8> %a, <i8 7, i8 8> diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index fa06b0caa6a64..ba70dded23d42 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -3540,6 +3540,97 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { // If we don't know the value of x.2, we don't know the value of x.1. EXPECT_TRUE(CR1.isFullSet()); } + { + auto M = parseModule(R"( + define void @test(i8 %x) { + %sext = sext i8 %x to i32 + %zext = zext i8 %x to i32 + ret void + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *SExt = &findInstructionByName(F, "sext"); + Instruction *ZExt = &findInstructionByName(F, "zext"); + ConstantRange SExtCR = computeConstantRange(SExt, true, true, &AC, SExt); + EXPECT_EQ(SExtCR.getSignedMin().getSExtValue(), -128); + EXPECT_EQ(SExtCR.getSignedMax().getSExtValue(), 127); + ConstantRange ZExtCR = computeConstantRange(ZExt, false, true, &AC, ZExt); + EXPECT_EQ(ZExtCR.getUnsignedMin().getZExtValue(), 0u); + EXPECT_EQ(ZExtCR.getUnsignedMax().getZExtValue(), 255u); + } + { + auto M = parseModule(R"( + define i32 @test(i8 %x) { + %ext = sext i8 %x to i32 + %add = add nsw i32 %ext, 10 + ret i32 %add + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Add = &findInstructionByName(F, "add"); + ConstantRange CR = computeConstantRange(Add, true, true, &AC, Add); + EXPECT_EQ(CR.getSignedMin().getSExtValue(), -118); + EXPECT_EQ(CR.getSignedMax().getSExtValue(), 137); + } + { + auto M = parseModule(R"( + define i32 @test(i8 %x, i8 %y) { + %ext.x = zext i8 %x to i32 + %ext.y = zext i8 %y to i32 + %sub = sub i32 %ext.x, %ext.y + ret i32 %sub + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Sub = &findInstructionByName(F, "sub"); + ConstantRange CR = computeConstantRange(Sub, true, true, &AC, Sub); + EXPECT_EQ(CR.getSignedMin().getSExtValue(), -255); + EXPECT_EQ(CR.getSignedMax().getSExtValue(), 255); + } + { + // trunc + auto M = parseModule(R"( + define void @test(i32 %x) { + %narrow = trunc i32 %x to i8 + ret void + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Trunc = &findInstructionByName(F, "narrow"); + ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc); + EXPECT_TRUE(CR.isFullSet()); + EXPECT_EQ(CR.getBitWidth(), 8u); + } + { + // trunc with restricted input range + auto M = parseModule(R"( + define i8 @test(i32 %x) { + %clamped = and i32 %x, 127 + %narrow = trunc i32 %clamped to i8 + ret i8 %narrow + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Trunc = &findInstructionByName(F, "narrow"); + ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc); + EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u); + EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 127u); + } + { + auto M = parseModule(R"( + define i32 @test(i8 %x, i8 %y) { + %ext.x = zext i8 %x to i32 + %ext.y = zext i8 %y to i32 + %or = or disjoint i32 %ext.x, %ext.y + ret i32 %or + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Or = &findInstructionByName(F, "or"); + ConstantRange CR = computeConstantRange(Or, false, true, &AC, Or); + EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u); + EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 510u); + } } struct FindAllocaForValueTestParams { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
