https://github.com/guy-david updated 
https://github.com/llvm/llvm-project/pull/181110

>From 738e835037ef59e2693cbf046f45eab459d44fce Mon Sep 17 00:00:00 2001
From: Guy David <[email protected]>
Date: Wed, 11 Feb 2026 16:20:41 +0200
Subject: [PATCH] [ValueTracking] Extend computeConstantRange for add/sub,
 sext/zext/trunc

Recursively compute operand ranges for add/sub and propagate ranges
through sext/zext/trunc.
For add/sub, the computed range is intersected with any existing range
from setLimitsForBinOp, and NSW/NUW flags are used via addWithNoWrap/
subWithNoWrap to tighten bounds.

The motivation is to enable further folding of reduce.add expressions
in comparisons, where the result range can be bounded by the input
element ranges.
---
 clang/test/CodeGen/isfpclass.c                |  4 +-
 clang/test/Headers/__clang_hip_math.hip       | 48 +++++-----
 clang/test/Headers/wasm.c                     | 32 +++----
 llvm/lib/Analysis/ValueTracking.cpp           | 67 +++++++++++--
 llvm/test/Analysis/BasicAA/range.ll           | 66 +++++++++++++
 llvm/test/CodeGen/AMDGPU/sdiv64.ll            | 27 ++----
 llvm/test/CodeGen/AMDGPU/srem64.ll            | 95 +++++++++----------
 llvm/test/CodeGen/AMDGPU/udiv64.ll            | 34 +++----
 llvm/test/CodeGen/AMDGPU/urem64.ll            | 50 +++++-----
 llvm/test/CodeGen/PowerPC/add_cmp.ll          | 12 +--
 llvm/test/Transforms/InstCombine/add.ll       |  4 +-
 llvm/test/Transforms/InstCombine/fls.ll       |  2 +-
 llvm/test/Transforms/InstCombine/icmp-add.ll  |  3 +-
 llvm/test/Transforms/InstCombine/sadd_sat.ll  | 10 +-
 .../InstCombine/saturating-add-sub.ll         |  5 +-
 llvm/unittests/Analysis/ValueTrackingTest.cpp | 91 ++++++++++++++++++
 16 files changed, 363 insertions(+), 187 deletions(-)

diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c
index 4c6d556e008e5..1465b43149fcc 100644
--- a/clang/test/CodeGen/isfpclass.c
+++ b/clang/test/CodeGen/isfpclass.c
@@ -136,7 +136,7 @@ typedef double __attribute__((ext_vector_type(4))) double4;
 typedef int __attribute__((ext_vector_type(4))) int4;
 typedef long __attribute__((ext_vector_type(4))) long4;
 
-// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> 
@check_isfpclass_nan_v4f32(
+// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> 
@check_isfpclass_nan_v4f32(
 // CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer
@@ -147,7 +147,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) {
   return __builtin_isfpclass(x, 3 /*NaN*/);
 }
 
-// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> 
@check_isfpclass_nan_strict_v4f32(
+// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> 
@check_isfpclass_nan_strict_v4f32(
 // CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 
x float> [[X]], i32 3) #[[ATTR5]]
diff --git a/clang/test/Headers/__clang_hip_math.hip 
b/clang/test/Headers/__clang_hip_math.hip
index 68a8666e41856..0a9c757aabf55 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -2653,7 +2653,7 @@ extern "C" __device__ int test_ilogb(double x) {
   return ilogb(x);
 }
 
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef(
 // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2666,7 +2666,7 @@ extern "C" __device__ int test_ilogb(double x) {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 1
 //
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef(
 // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2674,7 +2674,7 @@ extern "C" __device__ int test_ilogb(double x) {
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef(
 // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2682,7 +2682,7 @@ extern "C" __device__ int test_ilogb(double x) {
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finitef(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 
@test___finitef(
 // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) 
#[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) float 
@llvm.fabs.f32(float [[X]])
@@ -2694,7 +2694,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
   return __finitef(x);
 }
 
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite(
 // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double 
[[X]])
@@ -2707,7 +2707,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 1
 //
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite(
 // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2715,7 +2715,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite(
 // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2723,7 +2723,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finite(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 
@test___finite(
 // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) 
#[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) double 
@llvm.fabs.f64(double [[X]])
@@ -2735,7 +2735,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
   return __finite(x);
 }
 
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff(
 // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2748,7 +2748,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff(
 // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2756,7 +2756,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff(
 // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2764,7 +2764,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinff(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 
@test___isinff(
 // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) 
#[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) float 
@llvm.fabs.f32(float [[X]])
@@ -2776,7 +2776,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
   return __isinff(x);
 }
 
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf(
 // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double 
[[X]])
@@ -2789,7 +2789,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf(
 // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2797,7 +2797,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf(
 // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2805,7 +2805,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP1]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinf(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 
@test___isinf(
 // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) 
#[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call addrspace(4) double 
@llvm.fabs.f64(double [[X]])
@@ -2817,7 +2817,7 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) {
   return __isinf(x);
 }
 
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf(
 // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
@@ -2829,21 +2829,21 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf(
 // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf(
 // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnanf(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 
@test___isnanf(
 // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) 
#[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
@@ -2854,7 +2854,7 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) {
   return __isnanf(x);
 }
 
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan(
 // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
@@ -2866,21 +2866,21 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    ret i32 0
 //
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan(
 // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
 // APPROX-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // APPROX-NEXT:    ret i32 [[CONV]]
 //
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan(
 // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
 // NCRDIV-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i32
 // NCRDIV-NEXT:    ret i32 [[CONV]]
 //
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnan(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 
@test___isnan(
 // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) 
#[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 2545a014e4340..fdce091fe640e 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1234,7 +1234,7 @@ v128_t test_u16x8_ge(v128_t a, v128_t b) {
   return wasm_u16x8_ge(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_eq(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_i32x4_eq(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
@@ -1245,7 +1245,7 @@ v128_t test_i32x4_eq(v128_t a, v128_t b) {
   return wasm_i32x4_eq(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ne(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_i32x4_ne(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <4 x i32> [[A]], [[B]]
@@ -1256,7 +1256,7 @@ v128_t test_i32x4_ne(v128_t a, v128_t b) {
   return wasm_i32x4_ne(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_lt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_i32x4_lt(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <4 x i32> [[A]], [[B]]
@@ -1267,7 +1267,7 @@ v128_t test_i32x4_lt(v128_t a, v128_t b) {
   return wasm_i32x4_lt(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_lt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_u32x4_lt(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <4 x i32> [[A]], [[B]]
@@ -1278,7 +1278,7 @@ v128_t test_u32x4_lt(v128_t a, v128_t b) {
   return wasm_u32x4_lt(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_gt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_i32x4_gt(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A]], [[B]]
@@ -1289,7 +1289,7 @@ v128_t test_i32x4_gt(v128_t a, v128_t b) {
   return wasm_i32x4_gt(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_gt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_u32x4_gt(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
@@ -1300,7 +1300,7 @@ v128_t test_u32x4_gt(v128_t a, v128_t b) {
   return wasm_u32x4_gt(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_le(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_i32x4_le(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <4 x i32> [[A]], [[B]]
@@ -1311,7 +1311,7 @@ v128_t test_i32x4_le(v128_t a, v128_t b) {
   return wasm_i32x4_le(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_le(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_u32x4_le(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <4 x i32> [[A]], [[B]]
@@ -1322,7 +1322,7 @@ v128_t test_u32x4_le(v128_t a, v128_t b) {
   return wasm_u32x4_le(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ge(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_i32x4_ge(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <4 x i32> [[A]], [[B]]
@@ -1333,7 +1333,7 @@ v128_t test_i32x4_ge(v128_t a, v128_t b) {
   return wasm_i32x4_ge(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_ge(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_u32x4_ge(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <4 x i32> [[A]], [[B]]
@@ -1428,7 +1428,7 @@ v128_t test_i64x2_ge(v128_t a, v128_t b) {
   return wasm_i64x2_ge(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_eq(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_f32x4_eq(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1441,7 +1441,7 @@ v128_t test_f32x4_eq(v128_t a, v128_t b) {
   return wasm_f32x4_eq(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ne(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_f32x4_ne(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1454,7 +1454,7 @@ v128_t test_f32x4_ne(v128_t a, v128_t b) {
   return wasm_f32x4_ne(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_lt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_f32x4_lt(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1467,7 +1467,7 @@ v128_t test_f32x4_lt(v128_t a, v128_t b) {
   return wasm_f32x4_lt(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_gt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_f32x4_gt(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1480,7 +1480,7 @@ v128_t test_f32x4_gt(v128_t a, v128_t b) {
   return wasm_f32x4_gt(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_le(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_f32x4_le(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1493,7 +1493,7 @@ v128_t test_f32x4_le(v128_t a, v128_t b) {
   return wasm_f32x4_le(a, b);
 }
 
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ge(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> 
@test_f32x4_ge(
 // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
diff --git a/llvm/lib/Analysis/ValueTracking.cpp 
b/llvm/lib/Analysis/ValueTracking.cpp
index 4d8f3153b726f..223917a9fb7aa 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -9800,10 +9800,15 @@ std::optional<bool> 
llvm::isImpliedByDomCondition(CmpPredicate Pred,
   return std::nullopt;
 }
 
-static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
-                              APInt &Upper, const InstrInfoQuery &IIQ,
-                              bool PreferSignedRange) {
-  unsigned Width = Lower.getBitWidth();
+static ConstantRange getRangeForBinOp(const BinaryOperator &BO,
+                                      bool PreferSignedRange, bool 
UseInstrInfo,
+                                      AssumptionCache *AC,
+                                      const Instruction *CtxI,
+                                      const DominatorTree *DT, unsigned Depth) 
{
+  unsigned Width = BO.getType()->getScalarSizeInBits();
+  InstrInfoQuery IIQ(UseInstrInfo);
+  APInt Lower = APInt(Width, 0);
+  APInt Upper = APInt(Width, 0);
   const APInt *C;
   switch (BO.getOpcode()) {
   case Instruction::Sub:
@@ -10023,6 +10028,36 @@ static void setLimitsForBinOp(const BinaryOperator 
&BO, APInt &Lower,
   default:
     break;
   }
+
+  ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
+  unsigned Opc = BO.getOpcode();
+  bool IsDisjointOr =
+      Opc == Instruction::Or && cast<PossiblyDisjointInst>(&BO)->isDisjoint();
+  if (Opc == Instruction::Add || Opc == Instruction::Sub || IsDisjointOr) {
+    // Limit recursion depth more aggressively for binary operations.
+    unsigned NewDepth = std::max(Depth + 1, MaxAnalysisRecursionDepth - 1);
+    ConstantRange LHS =
+        computeConstantRange(BO.getOperand(0), PreferSignedRange, UseInstrInfo,
+                             AC, CtxI, DT, NewDepth);
+    ConstantRange RHS =
+        computeConstantRange(BO.getOperand(1), PreferSignedRange, UseInstrInfo,
+                             AC, CtxI, DT, NewDepth);
+    unsigned NoWrapKind = 0;
+    // Only Add and Sub have no-wrap flags, not disjoint Or.
+    if (!IsDisjointOr) {
+      if (IIQ.hasNoUnsignedWrap(&BO))
+        NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap;
+      if (IIQ.hasNoSignedWrap(&BO))
+        NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap;
+    }
+    // Disjoint OR is semantically equivalent to Add.
+    ConstantRange OpCR = Opc == Instruction::Sub
+                             ? LHS.subWithNoWrap(RHS, NoWrapKind)
+                             : LHS.addWithNoWrap(RHS, NoWrapKind);
+    CR = CR.intersectWith(OpCR, PreferSignedRange ? ConstantRange::Signed
+                                                  : ConstantRange::Unsigned);
+  }
+  return CR;
 }
 
 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II,
@@ -10219,11 +10254,25 @@ ConstantRange llvm::computeConstantRange(const Value 
*V, bool ForSigned,
   InstrInfoQuery IIQ(UseInstrInfo);
   ConstantRange CR = ConstantRange::getFull(BitWidth);
   if (auto *BO = dyn_cast<BinaryOperator>(V)) {
-    APInt Lower = APInt(BitWidth, 0);
-    APInt Upper = APInt(BitWidth, 0);
-    // TODO: Return ConstantRange.
-    setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned);
-    CR = ConstantRange::getNonEmpty(Lower, Upper);
+    CR = getRangeForBinOp(*BO, ForSigned, UseInstrInfo, AC, CtxI, DT, Depth);
+  } else if (isa<SExtInst>(V) || isa<ZExtInst>(V) || isa<TruncInst>(V)) {
+    auto *CastOp = cast<CastInst>(V);
+    ConstantRange OpCR =
+        computeConstantRange(CastOp->getOperand(0), ForSigned, UseInstrInfo, 
AC,
+                             CtxI, DT, Depth + 1);
+    switch (CastOp->getOpcode()) {
+    case Instruction::SExt:
+      CR = OpCR.signExtend(BitWidth);
+      break;
+    case Instruction::ZExt:
+      CR = OpCR.zeroExtend(BitWidth);
+      break;
+    case Instruction::Trunc:
+      CR = OpCR.truncate(BitWidth);
+      break;
+    default:
+      llvm_unreachable("Unexpected cast opcode");
+    }
   } else if (auto *II = dyn_cast<IntrinsicInst>(V))
     CR = getRangeForIntrinsic(*II, UseInstrInfo);
   else if (auto *SI = dyn_cast<SelectInst>(V)) {
diff --git a/llvm/test/Analysis/BasicAA/range.ll 
b/llvm/test/Analysis/BasicAA/range.ll
index e5dfb60c8b878..a41fd63ee52f6 100644
--- a/llvm/test/Analysis/BasicAA/range.ll
+++ b/llvm/test/Analysis/BasicAA/range.ll
@@ -271,6 +271,72 @@ entry:
   ret i32 %load_
 }
 
+; CHECK-LABEL: Function: zext_propagate_range
+; CHECK: NoAlias: i32* %gep, i32* %gep128
+define void @zext_propagate_range(ptr %p, i8 %idx) {
+  %narrow = and i8 %idx, 127
+  %wide = zext i8 %narrow to i64
+  %gep = getelementptr i32, ptr %p, i64 %wide
+  %gep128 = getelementptr i32, ptr %p, i64 128
+  load i32, ptr %gep
+  load i32, ptr %gep128
+  ret void
+}
+
+; CHECK-LABEL: Function: sext_propagate_range
+; CHECK: NoAlias: i32* %gep, i32* %gep128
+define void @sext_propagate_range(ptr %p, i8 %idx) {
+  %clamped = and i8 %idx, 100
+  %wide = sext i8 %clamped to i64
+  %gep = getelementptr i32, ptr %p, i64 %wide
+  %gep128 = getelementptr i32, ptr %p, i64 128
+  load i32, ptr %gep
+  load i32, ptr %gep128
+  ret void
+}
+
+; CHECK-LABEL: Function: zext_add_range
+; CHECK: NoAlias: i32* %gep, i32* %gep512
+define void @zext_add_range(ptr %p, i8 %x, i8 %y) {
+  %ext.x = zext i8 %x to i64
+  %ext.y = zext i8 %y to i64
+  %sum = add i64 %ext.x, %ext.y
+  %gep = getelementptr i32, ptr %p, i64 %sum
+  %gep512 = getelementptr i32, ptr %p, i64 512
+  load i32, ptr %gep
+  load i32, ptr %gep512
+  ret void
+}
+
+; CHECK-LABEL: Function: zext_sub_range
+; CHECK: NoAlias: i32* %gep, i32* %gep256
+; CHECK: NoAlias: i32* %gep, i32* %gepneg256
+define void @zext_sub_range(ptr %p, i8 %x, i8 %y) {
+  %ext.x = zext i8 %x to i64
+  %ext.y = zext i8 %y to i64
+  %diff = sub i64 %ext.x, %ext.y
+  %gep = getelementptr i32, ptr %p, i64 %diff
+  %gep256 = getelementptr i32, ptr %p, i64 256
+  %gepneg256 = getelementptr i32, ptr %p, i64 -256
+  load i32, ptr %gep
+  load i32, ptr %gep256
+  load i32, ptr %gepneg256
+  ret void
+}
+
+; CHECK-LABEL: Function: trunc_propagate_range
+; CHECK: NoAlias: i32* %gep, i32* %gep64
+define void @trunc_propagate_range(ptr %p, i64 %idx) {
+  %clamped = and i64 %idx, 63
+  %narrow = trunc i64 %clamped to i8
+  %wide = zext i8 %narrow to i64
+  %gep = getelementptr i32, ptr %p, i64 %wide
+  %gep64 = getelementptr i32, ptr %p, i64 64
+  load i32, ptr %gep
+  load i32, ptr %gep64
+  ret void
+}
+
 declare void @llvm.assume(i1)
 
 !0 = !{ i32 0, i32 2 }
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll 
b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index fdb20f372ab8d..d3a027f99947b 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1275,12 +1275,11 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr 
addrspace(1) %out, i64 %x)
 ; GCN-IR-NEXT:    s_addc_u32 s11, 0, -1
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[2:3], 0
 ; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[12:13], s[10:11], 63
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[14:15], s[10:11], 63
-; GCN-IR-NEXT:    s_or_b64 s[12:13], s[8:9], s[12:13]
-; GCN-IR-NEXT:    s_and_b64 s[8:9], s[12:13], exec
+; GCN-IR-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[8:9]
+; GCN-IR-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GCN-IR-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-IR-NEXT:    s_cselect_b32 s8, 0, 24
-; GCN-IR-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
-; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[12:13]
 ; GCN-IR-NEXT:    s_mov_b32 s9, 0
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB10_5
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1462,13 +1461,11 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
+; GCN-IR-NEXT:    v_mov_b32_e32 v11, v10
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, 24, 0, s[4:5]
 ; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT:    v_mov_b32_e32 v11, v10
-; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB11_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1653,14 +1650,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[4:5]
-; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
 ; GCN-IR-NEXT:    v_mov_b32_e32 v11, v10
 ; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[4:5]
+; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
 ; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB12_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1755,12 +1750,10 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[0:1]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v9, v8
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[0:1]
-; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v3, v5, 0, s[4:5]
+; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB13_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
 ; GCN-IR-NEXT:    v_add_i32_e32 v7, vcc, 1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll 
b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 02d2e6c1473ab..3bee2fa7da49a 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -1414,73 +1414,72 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr 
addrspace(1) %out, i64 %x)
 ; GCN-IR-LABEL: s_test_srem_k_num_i64:
 ; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
 ; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-IR-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_ashr_i32 s8, s3, 31
-; GCN-IR-NEXT:    s_mov_b32 s9, s8
-; GCN-IR-NEXT:    s_xor_b64 s[2:3], s[2:3], s[8:9]
-; GCN-IR-NEXT:    s_sub_u32 s4, s2, s8
-; GCN-IR-NEXT:    s_subb_u32 s5, s3, s8
+; GCN-IR-NEXT:    s_ashr_i32 s6, s3, 31
+; GCN-IR-NEXT:    s_mov_b32 s7, s6
+; GCN-IR-NEXT:    s_xor_b64 s[2:3], s[2:3], s[6:7]
+; GCN-IR-NEXT:    s_sub_u32 s4, s2, s6
+; GCN-IR-NEXT:    s_subb_u32 s5, s3, s6
 ; GCN-IR-NEXT:    s_flbit_i32_b64 s14, s[4:5]
-; GCN-IR-NEXT:    s_add_u32 s2, s14, 0xffffffc5
-; GCN-IR-NEXT:    s_addc_u32 s3, 0, -1
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[4:5], 0
-; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[10:11], s[2:3], 63
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[12:13], s[2:3], 63
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[8:9], s[10:11]
-; GCN-IR-NEXT:    s_and_b64 s[8:9], s[10:11], exec
-; GCN-IR-NEXT:    s_cselect_b32 s8, 0, 24
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
-; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[10:11]
-; GCN-IR-NEXT:    s_mov_b32 s9, 0
+; GCN-IR-NEXT:    s_add_u32 s8, s14, 0xffffffc5
+; GCN-IR-NEXT:    s_addc_u32 s9, 0, -1
+; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[4:5], 0
+; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[10:11], s[8:9], 63
+; GCN-IR-NEXT:    s_mov_b64 s[2:3], 0
+; GCN-IR-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GCN-IR-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GCN-IR-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
+; GCN-IR-NEXT:    s_cselect_b32 s6, 0, 24
+; GCN-IR-NEXT:    s_mov_b32 s7, 0
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB10_5
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
-; GCN-IR-NEXT:    s_add_u32 s8, s2, 1
-; GCN-IR-NEXT:    s_addc_u32 s3, s3, 0
-; GCN-IR-NEXT:    s_cselect_b64 s[10:11], -1, 0
-; GCN-IR-NEXT:    s_sub_i32 s2, 63, s2
-; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[10:11]
-; GCN-IR-NEXT:    s_lshl_b64 s[2:3], 24, s2
+; GCN-IR-NEXT:    s_add_u32 s10, s8, 1
+; GCN-IR-NEXT:    s_addc_u32 s6, s9, 0
+; GCN-IR-NEXT:    s_cselect_b64 s[6:7], -1, 0
+; GCN-IR-NEXT:    s_sub_i32 s8, 63, s8
+; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
+; GCN-IR-NEXT:    s_lshl_b64 s[6:7], 24, s8
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB10_4
 ; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
-; GCN-IR-NEXT:    s_lshr_b64 s[10:11], 24, s8
+; GCN-IR-NEXT:    s_lshr_b64 s[10:11], 24, s10
 ; GCN-IR-NEXT:    s_add_u32 s12, s4, -1
 ; GCN-IR-NEXT:    s_addc_u32 s13, s5, -1
 ; GCN-IR-NEXT:    s_sub_u32 s14, 58, s14
 ; GCN-IR-NEXT:    s_subb_u32 s15, 0, 0
 ; GCN-IR-NEXT:    s_mov_b64 s[8:9], 0
-; GCN-IR-NEXT:    s_mov_b32 s7, 0
+; GCN-IR-NEXT:    s_mov_b32 s3, 0
 ; GCN-IR-NEXT:  .LBB10_3: ; %udiv-do-while
 ; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-IR-NEXT:    s_lshl_b64 s[10:11], s[10:11], 1
-; GCN-IR-NEXT:    s_lshr_b32 s6, s3, 31
-; GCN-IR-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[10:11], s[6:7]
-; GCN-IR-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3]
-; GCN-IR-NEXT:    s_sub_u32 s6, s12, s10
-; GCN-IR-NEXT:    s_subb_u32 s6, s13, s11
-; GCN-IR-NEXT:    s_ashr_i32 s8, s6, 31
+; GCN-IR-NEXT:    s_lshr_b32 s2, s7, 31
+; GCN-IR-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
+; GCN-IR-NEXT:    s_or_b64 s[10:11], s[10:11], s[2:3]
+; GCN-IR-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
+; GCN-IR-NEXT:    s_sub_u32 s2, s12, s10
+; GCN-IR-NEXT:    s_subb_u32 s2, s13, s11
+; GCN-IR-NEXT:    s_ashr_i32 s8, s2, 31
 ; GCN-IR-NEXT:    s_mov_b32 s9, s8
-; GCN-IR-NEXT:    s_and_b32 s6, s8, 1
+; GCN-IR-NEXT:    s_and_b32 s2, s8, 1
 ; GCN-IR-NEXT:    s_and_b64 s[16:17], s[8:9], s[4:5]
 ; GCN-IR-NEXT:    s_sub_u32 s10, s10, s16
 ; GCN-IR-NEXT:    s_subb_u32 s11, s11, s17
 ; GCN-IR-NEXT:    s_add_u32 s14, s14, 1
 ; GCN-IR-NEXT:    s_addc_u32 s15, s15, 0
 ; GCN-IR-NEXT:    s_cselect_b64 s[16:17], -1, 0
-; GCN-IR-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GCN-IR-NEXT:    s_mov_b64 s[8:9], s[2:3]
 ; GCN-IR-NEXT:    s_and_b64 vcc, exec, s[16:17]
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB10_3
 ; GCN-IR-NEXT:  .LBB10_4: ; %Flow6
-; GCN-IR-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
-; GCN-IR-NEXT:    s_or_b64 s[8:9], s[6:7], s[2:3]
+; GCN-IR-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
+; GCN-IR-NEXT:    s_or_b64 s[6:7], s[2:3], s[6:7]
 ; GCN-IR-NEXT:  .LBB10_5: ; %udiv-end
-; GCN-IR-NEXT:    v_mov_b32_e32 v0, s8
+; GCN-IR-NEXT:    v_mov_b32_e32 v0, s6
 ; GCN-IR-NEXT:    v_mul_hi_u32 v0, s4, v0
-; GCN-IR-NEXT:    s_mul_i32 s6, s4, s9
-; GCN-IR-NEXT:    s_mul_i32 s5, s5, s8
-; GCN-IR-NEXT:    s_mul_i32 s4, s4, s8
-; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, s6, v0
+; GCN-IR-NEXT:    s_mul_i32 s7, s4, s7
+; GCN-IR-NEXT:    s_mul_i32 s5, s5, s6
+; GCN-IR-NEXT:    s_mul_i32 s4, s4, s6
+; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, s7, v0
 ; GCN-IR-NEXT:    v_add_i32_e32 v1, vcc, s5, v0
 ; GCN-IR-NEXT:    v_sub_i32_e64 v0, vcc, 24, s4
 ; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
@@ -1612,12 +1611,10 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
+; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, 24, 0, s[4:5]
 ; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB11_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1801,13 +1798,11 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[4:5]
 ; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB12_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1908,12 +1903,10 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v11, v10
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v5, v1, 0, s[4:5]
+; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, v0, 0, s[4:5]
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB13_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
 ; GCN-IR-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll 
b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index 1c50f930facba..3f5be80b1efbd 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -912,12 +912,11 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr 
addrspace(1) %out, i64 %x)
 ; GCN-IR-NEXT:    s_addc_u32 s9, 0, -1
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[2:3], 0
 ; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[10:11], s[8:9], 63
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[6:7], s[10:11]
-; GCN-IR-NEXT:    s_and_b64 s[6:7], s[10:11], exec
+; GCN-IR-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GCN-IR-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GCN-IR-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-IR-NEXT:    s_cselect_b32 s6, 0, 24
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
-; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[10:11]
 ; GCN-IR-NEXT:    s_mov_b32 s7, 0
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB8_5
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1083,13 +1082,11 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_addc_u32_e64 v5, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[4:5]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[4:5]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v3, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[4:5]
 ; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB9_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1173,12 +1170,10 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[4:5]
-; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v2, v1, 0, s[4:5]
+; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v3, v0, 0, s[4:5]
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB10_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
 ; GCN-IR-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
@@ -1277,13 +1272,12 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr 
addrspace(1) %out, i64 %x)
 ; GCN-IR-NEXT:    s_subb_u32 s9, 0, 0
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[2:3], 0
 ; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[6:7], s[8:9], 63
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GCN-IR-NEXT:    s_and_b64 s[6:7], s[4:5], exec
-; GCN-IR-NEXT:    s_cselect_b32 s7, 0, s3
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GCN-IR-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-IR-NEXT:    s_cselect_b32 s6, 0, s2
-; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[12:13]
-; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; GCN-IR-NEXT:    s_cselect_b32 s7, 0, s3
 ; GCN-IR-NEXT:    s_mov_b64 s[4:5], 0
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB11_5
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1372,12 +1366,10 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[4:5]
-; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v2, v1, 0, s[4:5]
+; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v3, v0, 0, s[4:5]
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB12_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
 ; GCN-IR-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll 
b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 28e6627b87413..b6608b9f48a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -926,12 +926,11 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr 
addrspace(1) %out, i64 %x)
 ; GCN-IR-NEXT:    s_addc_u32 s9, 0, -1
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[2:3], 0
 ; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[10:11], s[8:9], 63
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[6:7], s[10:11]
-; GCN-IR-NEXT:    s_and_b64 s[6:7], s[10:11], exec
+; GCN-IR-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GCN-IR-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GCN-IR-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-IR-NEXT:    s_cselect_b32 s6, 0, 24
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
-; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[10:11]
 ; GCN-IR-NEXT:    s_mov_b32 s7, 0
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB6_5
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1042,13 +1041,12 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr 
addrspace(1) %out, i64 %x)
 ; GCN-IR-NEXT:    s_subb_u32 s9, 0, 0
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[2:3], 0
 ; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[6:7], s[8:9], 63
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GCN-IR-NEXT:    s_and_b64 s[6:7], s[4:5], exec
-; GCN-IR-NEXT:    s_cselect_b32 s7, 0, s3
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], exec
+; GCN-IR-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-IR-NEXT:    s_cselect_b32 s6, 0, s2
-; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[12:13]
-; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; GCN-IR-NEXT:    s_cselect_b32 s7, 0, s3
 ; GCN-IR-NEXT:    s_mov_b64 s[4:5], 0
 ; GCN-IR-NEXT:    s_cbranch_vccz .LBB7_5
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1219,13 +1217,11 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[4:5]
 ; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB8_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
@@ -1310,22 +1306,20 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e64 v2, s[4:5], 32, v2
 ; GCN-IR-NEXT:    v_ffbh_u32_e32 v3, v1
 ; GCN-IR-NEXT:    v_min_u32_e32 v8, v2, v3
-; GCN-IR-NEXT:    v_sub_i32_e64 v2, s[4:5], 48, v8
-; GCN-IR-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GCN-IR-NEXT:    v_sub_i32_e64 v4, s[4:5], 48, v8
+; GCN-IR-NEXT:    v_subb_u32_e64 v5, s[4:5], 0, 0, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
-; GCN-IR-NEXT:    v_cndmask_b32_e64 v5, v1, 0, s[4:5]
-; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, v0, 0, s[4:5]
-; GCN-IR-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v3, v1, 0, s[4:5]
+; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
+; GCN-IR-NEXT:    v_cndmask_b32_e64 v2, v0, 0, s[4:5]
+; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
 ; GCN-IR-NEXT:    s_cbranch_execz .LBB9_6
 ; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
-; GCN-IR-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
-; GCN-IR-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; GCN-IR-NEXT:    v_sub_i32_e64 v2, s[4:5], 63, v2
+; GCN-IR-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GCN-IR-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
+; GCN-IR-NEXT:    v_sub_i32_e64 v2, s[4:5], 63, v4
 ; GCN-IR-NEXT:    v_lshl_b64 v[2:3], v[0:1], v2
 ; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
@@ -1369,11 +1363,11 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:  .LBB9_5: ; %Flow4
 ; GCN-IR-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-IR-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
-; GCN-IR-NEXT:    v_or_b32_e32 v5, v5, v3
-; GCN-IR-NEXT:    v_or_b32_e32 v4, v4, v2
+; GCN-IR-NEXT:    v_or_b32_e32 v3, v5, v3
+; GCN-IR-NEXT:    v_or_b32_e32 v2, v4, v2
 ; GCN-IR-NEXT:  .LBB9_6: ; %Flow5
 ; GCN-IR-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GCN-IR-NEXT:    v_lshl_b64 v[2:3], v[4:5], 15
+; GCN-IR-NEXT:    v_lshl_b64 v[2:3], v[2:3], 15
 ; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
 ; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/PowerPC/add_cmp.ll 
b/llvm/test/CodeGen/PowerPC/add_cmp.ll
index cbe16a498a538..c5cc071e0183d 100644
--- a/llvm/test/CodeGen/PowerPC/add_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/add_cmp.ll
@@ -30,27 +30,27 @@ entry:
 
 define zeroext i1 @addiCmpiUnsignedOverflow(i32 zeroext %x) {
 entry:
-  %add = add nuw i32 110, %x 
-  %cmp = icmp ugt i32 %add, 100
+  %add = add nuw i32 110, %x
+  %cmp = icmp ugt i32 %add, 200
   ret i1 %cmp
 
 ; CHECK: === addiCmpiUnsignedOverflow
 ; CHECK: Optimized lowered selection DAG: %bb.0 
'addiCmpiUnsignedOverflow:entry'
 ; CHECK:   [[REG1:t[0-9]+]]: i32 = truncate {{t[0-9]+}}
 ; CHECK:   [[REG2:t[0-9]+]]: i32 = add nuw [[REG1]], Constant:i32<110>
-; CHECK:   {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<100>, setugt:ch
+; CHECK:   {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<200>, setugt:ch
 }
 
 define zeroext i1 @addiCmpiSignedOverflow(i16 signext %x) {
 entry:
-  %add = add nsw i16 16, %x 
-  %cmp = icmp sgt i16 %add, -32767
+  %add = add nsw i16 16, %x
+  %cmp = icmp sgt i16 %add, 30
   ret i1 %cmp
 
 ; CHECK: === addiCmpiSignedOverflow
 ; CHECK: Optimized lowered selection DAG: %bb.0 'addiCmpiSignedOverflow:entry'
 ; CHECK:   [[REG1:t[0-9]+]]: i16 = truncate {{t[0-9]+}}
 ; CHECK:   [[REG2:t[0-9]+]]: i16 = add nsw [[REG1]], Constant:i16<16>
-; CHECK:   {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<-32767>, setgt:ch
+; CHECK:   {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<30>, setgt:ch
 }
 
diff --git a/llvm/test/Transforms/InstCombine/add.ll 
b/llvm/test/Transforms/InstCombine/add.ll
index aa68dfb540064..9d19ff1d37c26 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -3274,9 +3274,7 @@ define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x 
i8> %x) {
 define <2 x i32> @dec_zext_add_nonzero_vec_poison2(<2 x i8> %x) {
 ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison2(
 ; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8)
-; CHECK-NEXT:    [[A:%.*]] = add nsw <2 x i8> [[O]], splat (i8 -1)
-; CHECK-NEXT:    [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32>
-; CHECK-NEXT:    [[C:%.*]] = add nuw nsw <2 x i32> [[B]], <i32 1, i32 poison>
+; CHECK-NEXT:    [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %o = or <2 x i8> %x, <i8 8, i8 8>
diff --git a/llvm/test/Transforms/InstCombine/fls.ll 
b/llvm/test/Transforms/InstCombine/fls.ll
index 68bc0a2fc8a1d..ea757268259f5 100644
--- a/llvm/test/Transforms/InstCombine/fls.ll
+++ b/llvm/test/Transforms/InstCombine/fls.ll
@@ -33,7 +33,7 @@ define i32 @flsnotconst(i64 %z) {
 ; CHECK-LABEL: @flsnotconst(
 ; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 
[[Z:%.*]], i1 false)
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc nuw nsw i64 [[CTLZ]] to i32
-; CHECK-NEXT:    [[GOO:%.*]] = sub nsw i32 64, [[TMP1]]
+; CHECK-NEXT:    [[GOO:%.*]] = sub nuw nsw i32 64, [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[GOO]]
 ;
   %goo = call i32 @flsl(i64 %z)
diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll 
b/llvm/test/Transforms/InstCombine/icmp-add.ll
index 1b66a50c26e59..94499c91b8f87 100644
--- a/llvm/test/Transforms/InstCombine/icmp-add.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -3160,7 +3160,8 @@ define i1 
@icmp_add_constant_with_constant_ult_to_slt_neg2(i8 range(i8 -4, 120)
 }
 
 ; Negative test: C2 is negative
-define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -4, 
10) %x) {
+; Prevent constant fold by using the range [-10, 10).
+define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -10, 
10) %x) {
 ; CHECK-LABEL: @icmp_add_constant_with_constant_ult_to_slt_neg3(
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X:%.*]], 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ADD]], -6
diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll 
b/llvm/test/Transforms/InstCombine/sadd_sat.ll
index 6afb77d975b8c..3143d4addecc1 100644
--- a/llvm/test/Transforms/InstCombine/sadd_sat.ll
+++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll
@@ -824,11 +824,11 @@ entry:
 
 define i16 @or(i8 %X, i16 %Y) {
 ; CHECK-LABEL: @or(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[Y:%.*]] to i8
-; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[TMP1]], -16
-; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 
[[TMP2]])
-; CHECK-NEXT:    [[L12:%.*]] = sext i8 [[TMP3]] to i16
-; CHECK-NEXT:    ret i16 [[L12]]
+; CHECK-NEXT:    [[L12:%.*]] = sext i8 [[TMP3:%.*]] to i16
+; CHECK-NEXT:    [[CONV14:%.*]] = or i16 [[Y:%.*]], -16
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i16 [[L12]], [[CONV14]]
+; CHECK-NEXT:    [[L13:%.*]] = call i16 @llvm.smin.i16(i16 [[SUB]], i16 127)
+; CHECK-NEXT:    ret i16 [[L13]]
 ;
   %conv10 = sext i8 %X to i16
   %conv14 = or i16 %Y, 65520
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll 
b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index efa89db4af61a..dff1f09213864 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1111,8 +1111,7 @@ define <3 x i8> 
@test_vector_usub_add_nuw_no_ov_nonsplat1_poison(<3 x i8> %a) {
 ; Can be optimized if the add nuw RHS constant range handles non-splat vectors.
 define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2(
-; CHECK-NEXT:    [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 9>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], 
<2 x i8> splat (i8 9))
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[A:%.*]], <i8 1, i8 0>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %b = add nuw <2 x i8> %a, <i8 10, i8 9>
@@ -1188,7 +1187,7 @@ define <2 x i8> 
@test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8>
 ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2(
 ; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 8>
 ; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7)
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> 
[[AA]], <2 x i8> [[BB]])
+; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %aa = add nsw <2 x i8> %a, <i8 7, i8 8>
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp 
b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index fa06b0caa6a64..ba70dded23d42 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -3540,6 +3540,97 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) {
     // If we don't know the value of x.2, we don't know the value of x.1.
     EXPECT_TRUE(CR1.isFullSet());
   }
+  {
+    auto M = parseModule(R"(
+  define void @test(i8 %x) {
+    %sext = sext i8 %x to i32
+    %zext = zext i8 %x to i32
+    ret void
+  })");
+    Function *F = M->getFunction("test");
+    AssumptionCache AC(*F);
+    Instruction *SExt = &findInstructionByName(F, "sext");
+    Instruction *ZExt = &findInstructionByName(F, "zext");
+    ConstantRange SExtCR = computeConstantRange(SExt, true, true, &AC, SExt);
+    EXPECT_EQ(SExtCR.getSignedMin().getSExtValue(), -128);
+    EXPECT_EQ(SExtCR.getSignedMax().getSExtValue(), 127);
+    ConstantRange ZExtCR = computeConstantRange(ZExt, false, true, &AC, ZExt);
+    EXPECT_EQ(ZExtCR.getUnsignedMin().getZExtValue(), 0u);
+    EXPECT_EQ(ZExtCR.getUnsignedMax().getZExtValue(), 255u);
+  }
+  {
+    auto M = parseModule(R"(
+  define i32 @test(i8 %x) {
+    %ext = sext i8 %x to i32
+    %add = add nsw i32 %ext, 10
+    ret i32 %add
+  })");
+    Function *F = M->getFunction("test");
+    AssumptionCache AC(*F);
+    Instruction *Add = &findInstructionByName(F, "add");
+    ConstantRange CR = computeConstantRange(Add, true, true, &AC, Add);
+    EXPECT_EQ(CR.getSignedMin().getSExtValue(), -118);
+    EXPECT_EQ(CR.getSignedMax().getSExtValue(), 137);
+  }
+  {
+    auto M = parseModule(R"(
+  define i32 @test(i8 %x, i8 %y) {
+    %ext.x = zext i8 %x to i32
+    %ext.y = zext i8 %y to i32
+    %sub = sub i32 %ext.x, %ext.y
+    ret i32 %sub
+  })");
+    Function *F = M->getFunction("test");
+    AssumptionCache AC(*F);
+    Instruction *Sub = &findInstructionByName(F, "sub");
+    ConstantRange CR = computeConstantRange(Sub, true, true, &AC, Sub);
+    EXPECT_EQ(CR.getSignedMin().getSExtValue(), -255);
+    EXPECT_EQ(CR.getSignedMax().getSExtValue(), 255);
+  }
+  {
+    // trunc
+    auto M = parseModule(R"(
+  define void @test(i32 %x) {
+    %narrow = trunc i32 %x to i8
+    ret void
+  })");
+    Function *F = M->getFunction("test");
+    AssumptionCache AC(*F);
+    Instruction *Trunc = &findInstructionByName(F, "narrow");
+    ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc);
+    EXPECT_TRUE(CR.isFullSet());
+    EXPECT_EQ(CR.getBitWidth(), 8u);
+  }
+  {
+    // trunc with restricted input range
+    auto M = parseModule(R"(
+  define i8 @test(i32 %x) {
+    %clamped = and i32 %x, 127
+    %narrow = trunc i32 %clamped to i8
+    ret i8 %narrow
+  })");
+    Function *F = M->getFunction("test");
+    AssumptionCache AC(*F);
+    Instruction *Trunc = &findInstructionByName(F, "narrow");
+    ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc);
+    EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u);
+    EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 127u);
+  }
+  {
+    auto M = parseModule(R"(
+  define i32 @test(i8 %x, i8 %y) {
+    %ext.x = zext i8 %x to i32
+    %ext.y = zext i8 %y to i32
+    %or = or disjoint i32 %ext.x, %ext.y
+    ret i32 %or
+  })");
+    Function *F = M->getFunction("test");
+    AssumptionCache AC(*F);
+    Instruction *Or = &findInstructionByName(F, "or");
+    ConstantRange CR = computeConstantRange(Or, false, true, &AC, Or);
+    EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u);
+    EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 510u);
+  }
 }
 
 struct FindAllocaForValueTestParams {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to