https://github.com/goldsteinn updated 
https://github.com/llvm/llvm-project/pull/88183

>From 31b373984bcbb51db9f1d1c939492515fb721c8d Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein....@gmail.com>
Date: Sat, 4 May 2024 18:12:34 -0500
Subject: [PATCH 1/5] [Inliner] Add tests for propagating more parameter
 attributes; NFC

---
 .../Inline/access-attributes-prop.ll          | 116 +++++++++++++++++-
 1 file changed, 114 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll 
b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index ffd31fbe8ae107..125d3f963e1338 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -46,7 +46,6 @@ define dso_local void @foo3_writable(ptr %p) {
   ret void
 }
 
-
 define dso_local void @foo1_bar_aligned64_deref512(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@foo1_bar_aligned64_deref512
 ; CHECK-SAME: (ptr [[P:%.*]]) {
@@ -322,6 +321,16 @@ define void @prop_param_nonnull_and_align(ptr %p) {
   ret void
 }
 
+define void @prop_param_nofree_and_align(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @bar1(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo1(ptr nofree align 32 %p)
+  ret void
+}
+
 define void @prop_param_deref_align_no_update(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_no_update
 ; CHECK-SAME: (ptr [[P:%.*]]) {
@@ -528,7 +537,6 @@ define void @prop_no_conflict_writable(ptr %p) {
   ret void
 }
 
-
 define void @prop_no_conflict_writable2(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_no_conflict_writable2
 ; CHECK-SAME: (ptr [[P:%.*]]) {
@@ -539,3 +547,107 @@ define void @prop_no_conflict_writable2(ptr %p) {
   ret void
 }
 
+declare void @bar4(i32)
+
+define dso_local void @foo4_range_0_10(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo4_range_0_10
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @bar4(i32 range(i32 0, 10) %v)
+  ret void
+}
+
+define dso_local void @foo4_2_range_0_10(i32 range(i32 0, 10) %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo4_2_range_0_10
+; CHECK-SAME: (i32 range(i32 0, 10) [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @bar4(i32 %v)
+  ret void
+}
+
+
+define dso_local void @foo4(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo4
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @bar4(i32 %v)
+  ret void
+}
+
+
+
+define void @prop_range_empty_intersect(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_intersect
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo4_range_0_10(i32 range(i32 11, 50) %v)
+  ret void
+}
+
+define void @prop_range_empty(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_empty
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo4(i32 range(i32 1, 0) %v)
+  ret void
+}
+
+define void @prop_range_empty_with_intersect(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_with_intersect
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo4_range_0_10(i32 range(i32 1, 0) %v)
+  ret void
+}
+
+define void @prop_range_intersect1(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect1
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo4_range_0_10(i32 range(i32 0, 9) %v)
+  ret void
+}
+
+define void @prop_range_intersect2(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect2
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo4_range_0_10(i32 range(i32 1, 9) %v)
+  ret void
+}
+
+define void @prop_range_intersect3(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect3
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo4_2_range_0_10(i32 range(i32 0, 11) %v)
+  ret void
+}
+
+define void @prop_range_direct(i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@prop_range_direct
+; CHECK-SAME: (i32 [[V:%.*]]) {
+; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo4(i32 range(i32 1, 11) %v)
+  ret void
+}

>From 238dd3b7d8d9da5f09161df58106b635fd9cbe97 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein....@gmail.com>
Date: Sat, 4 May 2024 13:57:45 -0500
Subject: [PATCH 2/5] [Inliner] Propagate more attributes to params when
 inlining

Add support for propagating:
    - `derefereancable`
    - `derefereancable_or_null`
    - `align`
    - `nonnull`
    - `nofree`

These are only propagated if the parameter to the to-be-inlined
callsite match the exact parameter used in the to-be-inlined function.
---
 .../test/CodeGen/attr-counted-by-pr88931.cpp  |  2 +-
 clang/test/OpenMP/bug57757.cpp                |  2 +-
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 74 +++++++++++++++----
 .../Inline/access-attributes-prop.ll          | 16 ++--
 .../Inline/assumptions-from-callsite-attrs.ll |  2 +-
 llvm/test/Transforms/Inline/byval.ll          |  4 +-
 6 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/clang/test/CodeGen/attr-counted-by-pr88931.cpp 
b/clang/test/CodeGen/attr-counted-by-pr88931.cpp
index 2a8cc1d07e50d9..6d0c46bbbe8f9c 100644
--- a/clang/test/CodeGen/attr-counted-by-pr88931.cpp
+++ b/clang/test/CodeGen/attr-counted-by-pr88931.cpp
@@ -13,7 +13,7 @@ void init(void * 
__attribute__((pass_dynamic_object_size(0))));
 // CHECK-LABEL: define dso_local void @_ZN3foo3barC1Ev(
 // CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(1) [[THIS:%.*]]) 
unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr 
noundef nonnull [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr 
noundef nonnull align 4 dereferenceable(1) [[THIS]], i64 noundef -1) 
#[[ATTR2:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 foo::bar::bar() {
diff --git a/clang/test/OpenMP/bug57757.cpp b/clang/test/OpenMP/bug57757.cpp
index e1f646e2b141a0..c4e309d7f566b5 100644
--- a/clang/test/OpenMP/bug57757.cpp
+++ b/clang/test/OpenMP/bug57757.cpp
@@ -39,7 +39,7 @@ void foo() {
 // CHECK-NEXT:    ]
 // CHECK:       .untied.jmp..i:
 // CHECK-NEXT:    store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], 
!alias.scope [[META13]], !noalias [[META17]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull 
@[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias [[META13]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull 
@[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP1]]), !noalias [[META13]]
 // CHECK-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
 // CHECK:       .untied.next..i:
 // CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 
40
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp 
b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 1aae561d8817b5..45bccd0a041509 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1352,20 +1352,41 @@ static void AddParamAndFnBasicAttributes(const CallBase 
&CB,
   auto &Context = CalledFunction->getContext();
 
   // Collect valid attributes for all params.
-  SmallVector<AttrBuilder> ValidParamAttrs;
+  SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
   bool HasAttrToPropagate = false;
 
   for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
-    ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
+    ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
+    ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
     // Access attributes can be propagated to any param with the same 
underlying
     // object as the argument.
     if (CB.paramHasAttr(I, Attribute::ReadNone))
-      ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
+      ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
     if (CB.paramHasAttr(I, Attribute::ReadOnly))
-      ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
+      ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
     if (CB.paramHasAttr(I, Attribute::WriteOnly))
-      ValidParamAttrs.back().addAttribute(Attribute::WriteOnly);
-    HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
+      ValidObjParamAttrs.back().addAttribute(Attribute::WriteOnly);
+
+    // Attributes we can only propagate if the exact parameter is forwarded.
+
+    // We can propagate both poison generating an UB generating attributes
+    // without any extra checks. The only attribute that is tricky to propagate
+    // is `noundef` (skipped for now) as that can create new UB where previous
+    // behavior was just using a poison value.
+    if (auto DerefBytes = CB.getParamDereferenceableBytes(I))
+      ValidExactParamAttrs.back().addDereferenceableAttr(DerefBytes);
+    if (auto DerefOrNullBytes = CB.getParamDereferenceableOrNullBytes(I))
+      ValidExactParamAttrs.back().addDereferenceableOrNullAttr(
+          DerefOrNullBytes);
+    if (CB.paramHasAttr(I, Attribute::NoFree))
+      ValidExactParamAttrs.back().addAttribute(Attribute::NoFree);
+    if (CB.paramHasAttr(I, Attribute::NonNull))
+      ValidExactParamAttrs.back().addAttribute(Attribute::NonNull);
+    if (auto Align = CB.getParamAlign(I))
+      ValidExactParamAttrs.back().addAlignmentAttr(Align);
+
+    HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
+    HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
   }
 
   // Won't be able to propagate anything.
@@ -1383,15 +1404,42 @@ static void AddParamAndFnBasicAttributes(const CallBase 
&CB,
       AttributeList AL = NewInnerCB->getAttributes();
       for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
         // Check if the underlying value for the parameter is an argument.
-        const Value *UnderlyingV =
-            getUnderlyingObject(InnerCB->getArgOperand(I));
-        const Argument *Arg = dyn_cast<Argument>(UnderlyingV);
-        if (!Arg)
-          continue;
+        const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I));
+        unsigned ArgNo;
+        if (Arg) {
+          ArgNo = Arg->getArgNo();
+          // For dereferenceable, dereferenceable_or_null, align, etc...
+          // we don't want to propagate if the existing param has the same
+          // attribute with "better" constraints. So, only remove from the
+          // existing AL if the region of the existing param is smaller than
+          // what we can propagate. AttributeList's merge API honours the
+          // already existing attribute value so we choose the "better"
+          // attribute by removing if the existing one is worse.
+          if (AL.getParamDereferenceableBytes(I) <
+              ValidExactParamAttrs[ArgNo].getDereferenceableBytes())
+            AL =
+                AL.removeParamAttribute(Context, I, 
Attribute::Dereferenceable);
+          if (AL.getParamDereferenceableOrNullBytes(I) <
+              ValidExactParamAttrs[ArgNo].getDereferenceableOrNullBytes())
+            AL =
+                AL.removeParamAttribute(Context, I, 
Attribute::Dereferenceable);
+          if (AL.getParamAlignment(I).valueOrOne() <
+              ValidExactParamAttrs[ArgNo].getAlignment().valueOrOne())
+            AL = AL.removeParamAttribute(Context, I, Attribute::Alignment);
+
+          AL = AL.addParamAttributes(Context, I, ValidExactParamAttrs[ArgNo]);
+
+        } else {
+          const Value *UnderlyingV =
+              getUnderlyingObject(InnerCB->getArgOperand(I));
+          Arg = dyn_cast<Argument>(UnderlyingV);
+          if (!Arg)
+            continue;
+          ArgNo = Arg->getArgNo();
+        }
 
-        unsigned ArgNo = Arg->getArgNo();
         // If so, propagate its access attributes.
-        AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);
+        AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
         // We can have conflicting attributes from the inner callsite and
         // to-be-inlined callsite. In that case, choose the most
         // restrictive.
diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll 
b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index 125d3f963e1338..f3c656be00f59b 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -294,7 +294,7 @@ define void @prop_param_callbase_def_1x_partial_3(ptr %p, 
ptr %p2) {
 define void @prop_deref(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_deref
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr [[P]])
+; CHECK-NEXT:    call void @bar1(ptr dereferenceable(16) [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1(ptr dereferenceable(16) %p)
@@ -304,7 +304,7 @@ define void @prop_deref(ptr %p) {
 define void @prop_deref_or_null(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_deref_or_null
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr [[P]])
+; CHECK-NEXT:    call void @bar1(ptr dereferenceable_or_null(256) [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1(ptr dereferenceable_or_null(256) %p)
@@ -314,7 +314,7 @@ define void @prop_deref_or_null(ptr %p) {
 define void @prop_param_nonnull_and_align(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_param_nonnull_and_align
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr [[P]])
+; CHECK-NEXT:    call void @bar1(ptr nonnull align 32 [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1(ptr nonnull align 32 %p)
@@ -324,7 +324,7 @@ define void @prop_param_nonnull_and_align(ptr %p) {
 define void @prop_param_nofree_and_align(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr [[P]])
+; CHECK-NEXT:    call void @bar1(ptr nofree align 32 [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1(ptr nofree align 32 %p)
@@ -334,7 +334,7 @@ define void @prop_param_nofree_and_align(ptr %p) {
 define void @prop_param_deref_align_no_update(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_no_update
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr align 64 dereferenceable(512) [[P]])
+; CHECK-NEXT:    call void @bar1(ptr align 4 dereferenceable(64) [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1_bar_aligned64_deref512(ptr align 4 dereferenceable(64) %p)
@@ -344,7 +344,7 @@ define void @prop_param_deref_align_no_update(ptr %p) {
 define void @prop_param_deref_align_update(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_update
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr align 64 dereferenceable(512) [[P]])
+; CHECK-NEXT:    call void @bar1(ptr align 128 dereferenceable(1024) [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1_bar_aligned64_deref512(ptr align 128 dereferenceable(1024) 
%p)
@@ -354,7 +354,7 @@ define void @prop_param_deref_align_update(ptr %p) {
 define void @prop_param_deref_or_null_update(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_update
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr align 512 dereferenceable_or_null(512) 
[[P]])
+; CHECK-NEXT:    call void @bar1(ptr align 512 dereferenceable_or_null(1024) 
[[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1_bar_aligned512_deref_or_null512(ptr 
dereferenceable_or_null(1024) %p)
@@ -364,7 +364,7 @@ define void @prop_param_deref_or_null_update(ptr %p) {
 define void @prop_param_deref_or_null_no_update(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_no_update
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @bar1(ptr align 512 dereferenceable_or_null(512) 
[[P]])
+; CHECK-NEXT:    call void @bar1(ptr align 512 dereferenceable_or_null(32) 
[[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo1_bar_aligned512_deref_or_null512(ptr 
dereferenceable_or_null(32) %p)
diff --git a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll 
b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll
index 1a219a22019c43..c0943f4aefb8f9 100644
--- a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll
+++ b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll
@@ -8,7 +8,7 @@ declare void @h(ptr %p, ptr %q, ptr %z)
 define void @f(ptr %p, ptr %q, ptr %z) {
 ; CHECK-LABEL: define void @f
 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[Z:%.*]]) {
-; CHECK-NEXT:    call void @h(ptr [[P]], ptr [[Q]], ptr [[Z]])
+; CHECK-NEXT:    call void @h(ptr nonnull [[P]], ptr [[Q]], ptr nonnull [[Z]])
 ; CHECK-NEXT:    ret void
 ;
   call void @g(ptr nonnull %p, ptr %q, ptr nonnull %z)
diff --git a/llvm/test/Transforms/Inline/byval.ll 
b/llvm/test/Transforms/Inline/byval.ll
index dd5be40b90a8f2..1a70da8472cb1e 100644
--- a/llvm/test/Transforms/Inline/byval.ll
+++ b/llvm/test/Transforms/Inline/byval.ll
@@ -106,7 +106,7 @@ define void @test3() nounwind  {
 ; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS]], align 1
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr 
align 1 [[S]], i64 12, i1 false)
-; CHECK-NEXT:    call void @g3(ptr [[S1]]) #[[ATTR0]]
+; CHECK-NEXT:    call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
 ; CHECK-NEXT:    ret void
 ;
@@ -131,7 +131,7 @@ define i32 @test4() nounwind  {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
-; CHECK-NEXT:    call void @g3(ptr [[S]]) #[[ATTR0]]
+; CHECK-NEXT:    call void @g3(ptr align 64 [[S]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i32 4
 ;
 entry:

>From 420da827d1750d6ce10e469d9c36d12b434107df Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein....@gmail.com>
Date: Sat, 4 May 2024 13:57:54 -0500
Subject: [PATCH 3/5] [Inliner] Propagate `range` attributes to params when
 inlining

---
 llvm/include/llvm/IR/Attributes.h             | 12 ++++++++++
 llvm/include/llvm/IR/InstrTypes.h             |  4 ++++
 llvm/lib/IR/Attributes.cpp                    | 22 +++++++++++++++++++
 llvm/lib/IR/Instructions.cpp                  |  7 ++++++
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 14 ++++++++++++
 .../Inline/access-attributes-prop.ll          | 14 ++++++------
 6 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/IR/Attributes.h 
b/llvm/include/llvm/IR/Attributes.h
index dd11955714895e..337254906db885 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -752,6 +752,11 @@ class AttributeList {
   [[nodiscard]] AttributeList addRangeRetAttr(LLVMContext &C,
                                               const ConstantRange &CR) const;
 
+  /// Add the range attribute to the attribute set at the given arg index.
+  /// Returns a new list because attribute lists are immutable.
+  [[nodiscard]] AttributeList addRangeParamAttr(LLVMContext &C, unsigned Index,
+                                                const ConstantRange &CR) const;
+
   /// Add the allocsize attribute to the attribute set at the given arg index.
   /// Returns a new list because attribute lists are immutable.
   [[nodiscard]] AttributeList
@@ -906,6 +911,9 @@ class AttributeList {
   /// arg.
   uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;
 
+  /// Get range (or std::nullopt if unknown) of an arg.
+  std::optional<ConstantRange> getParamRange(unsigned ArgNo) const;
+
   /// Get the disallowed floating-point classes of the return value.
   FPClassTest getRetNoFPClass() const;
 
@@ -1082,6 +1090,10 @@ class AttrBuilder {
   /// invalid if the Kind is not present in the builder.
   Attribute getAttribute(StringRef Kind) const;
 
+  /// Retrieve the range if the attribute exists (std::nullopt is returned
+  /// otherwise).
+  std::optional<ConstantRange> getRange() const;
+
   /// Return raw (possibly packed/encoded) value of integer attribute or
   /// std::nullopt if not set.
   std::optional<uint64_t> getRawIntAttr(Attribute::AttrKind Kind) const;
diff --git a/llvm/include/llvm/IR/InstrTypes.h 
b/llvm/include/llvm/IR/InstrTypes.h
index b9af3a6ca42c06..87335f0b28c6b4 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -2198,6 +2198,10 @@ class CallBase : public Instruction {
   /// parameter.
   FPClassTest getParamNoFPClass(unsigned i) const;
 
+  /// If arg ArgNo has a range attribute, return the value range of the
+  /// argument. Otherwise, std::nullopt is returned.
+  std::optional<ConstantRange> getParamRange(unsigned ArgNo) const;
+
   /// If this return value has a range attribute, return the value range of the
   /// argument. Otherwise, std::nullopt is returned.
   std::optional<ConstantRange> getRange() const;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index c8d6bdd423878b..0cbfe923032c86 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -1530,6 +1530,13 @@ 
AttributeList::addDereferenceableOrNullParamAttr(LLVMContext &C, unsigned Index,
   return addParamAttributes(C, Index, B);
 }
 
+AttributeList AttributeList::addRangeParamAttr(LLVMContext &C, unsigned Index,
+                                               const ConstantRange &CR) const {
+  AttrBuilder B(C);
+  B.addRangeAttr(CR);
+  return addParamAttributes(C, Index, B);
+}
+
 AttributeList AttributeList::addRangeRetAttr(LLVMContext &C,
                                              const ConstantRange &CR) const {
   AttrBuilder B(C);
@@ -1658,6 +1665,14 @@ 
AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
   return getParamAttrs(Index).getDereferenceableOrNullBytes();
 }
 
+std::optional<ConstantRange>
+AttributeList::getParamRange(unsigned Index) const {
+  auto RangeAttr = getParamAttrs(Index).getAttribute(Attribute::Range);
+  if (RangeAttr.isValid())
+    return RangeAttr.getRange();
+  return std::nullopt;
+}
+
 FPClassTest AttributeList::getRetNoFPClass() const {
   return getRetAttrs().getNoFPClass();
 }
@@ -1991,6 +2006,13 @@ Attribute AttrBuilder::getAttribute(StringRef A) const {
   return {};
 }
 
+std::optional<ConstantRange> AttrBuilder::getRange() const {
+  const Attribute RangeAttr = getAttribute(Attribute::Range);
+  if (RangeAttr.isValid())
+    return RangeAttr.getRange();
+  return std::nullopt;
+}
+
 bool AttrBuilder::contains(Attribute::AttrKind A) const {
   return getAttribute(A).isValid();
 }
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 7ad1ad4cddb703..ee832d2093a132 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -396,6 +396,13 @@ FPClassTest CallBase::getParamNoFPClass(unsigned i) const {
   return Mask;
 }
 
+std::optional<ConstantRange> CallBase::getParamRange(unsigned ArgNo) const {
+  const Attribute RangeAttr = getParamAttr(ArgNo, llvm::Attribute::Range);
+  if (RangeAttr.isValid())
+    return RangeAttr.getRange();
+  return std::nullopt;
+}
+
 std::optional<ConstantRange> CallBase::getRange() const {
   const Attribute RangeAttr = getRetAttr(llvm::Attribute::Range);
   if (RangeAttr.isValid())
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp 
b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 45bccd0a041509..41f899fe120f63 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1384,6 +1384,8 @@ static void AddParamAndFnBasicAttributes(const CallBase 
&CB,
       ValidExactParamAttrs.back().addAttribute(Attribute::NonNull);
     if (auto Align = CB.getParamAlign(I))
       ValidExactParamAttrs.back().addAlignmentAttr(Align);
+    if (auto Range = CB.getParamRange(I))
+      ValidExactParamAttrs.back().addRangeAttr(*Range);
 
     HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
     HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
@@ -1427,8 +1429,20 @@ static void AddParamAndFnBasicAttributes(const CallBase 
&CB,
               ValidExactParamAttrs[ArgNo].getAlignment().valueOrOne())
             AL = AL.removeParamAttribute(Context, I, Attribute::Alignment);
 
+          auto ExistingRange = AL.getParamRange(I);
           AL = AL.addParamAttributes(Context, I, ValidExactParamAttrs[ArgNo]);
 
+          // For range we use the exact intersection.
+          if (ExistingRange.has_value()) {
+            if (auto NewRange = ValidExactParamAttrs[ArgNo].getRange()) {
+              auto CombinedRange = 
ExistingRange->exactIntersectWith(*NewRange);
+              if (!CombinedRange.has_value())
+                CombinedRange =
+                    ConstantRange::getEmpty(NewRange->getBitWidth());
+              AL = AL.removeParamAttribute(Context, I, Attribute::Range);
+              AL = AL.addRangeParamAttr(Context, I, *CombinedRange);
+            }
+          }
         } else {
           const Value *UnderlyingV =
               getUnderlyingObject(InnerCB->getArgOperand(I));
diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll 
b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index f3c656be00f59b..e25023da6ed5ff 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -585,7 +585,7 @@ define dso_local void @foo4(i32 %v) {
 define void @prop_range_empty_intersect(i32 %v) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_intersect
 ; CHECK-SAME: (i32 [[V:%.*]]) {
-; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 0) [[V]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo4_range_0_10(i32 range(i32 11, 50) %v)
@@ -595,7 +595,7 @@ define void @prop_range_empty_intersect(i32 %v) {
 define void @prop_range_empty(i32 %v) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_range_empty
 ; CHECK-SAME: (i32 [[V:%.*]]) {
-; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    call void @bar4(i32 range(i32 1, 0) [[V]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo4(i32 range(i32 1, 0) %v)
@@ -605,7 +605,7 @@ define void @prop_range_empty(i32 %v) {
 define void @prop_range_empty_with_intersect(i32 %v) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_with_intersect
 ; CHECK-SAME: (i32 [[V:%.*]]) {
-; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    call void @bar4(i32 range(i32 1, 10) [[V]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo4_range_0_10(i32 range(i32 1, 0) %v)
@@ -615,7 +615,7 @@ define void @prop_range_empty_with_intersect(i32 %v) {
 define void @prop_range_intersect1(i32 %v) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect1
 ; CHECK-SAME: (i32 [[V:%.*]]) {
-; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 9) [[V]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo4_range_0_10(i32 range(i32 0, 9) %v)
@@ -625,7 +625,7 @@ define void @prop_range_intersect1(i32 %v) {
 define void @prop_range_intersect2(i32 %v) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect2
 ; CHECK-SAME: (i32 [[V:%.*]]) {
-; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 10) [[V]])
+; CHECK-NEXT:    call void @bar4(i32 range(i32 1, 9) [[V]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo4_range_0_10(i32 range(i32 1, 9) %v)
@@ -635,7 +635,7 @@ define void @prop_range_intersect2(i32 %v) {
 define void @prop_range_intersect3(i32 %v) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect3
 ; CHECK-SAME: (i32 [[V:%.*]]) {
-; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    call void @bar4(i32 range(i32 0, 11) [[V]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo4_2_range_0_10(i32 range(i32 0, 11) %v)
@@ -645,7 +645,7 @@ define void @prop_range_intersect3(i32 %v) {
 define void @prop_range_direct(i32 %v) {
 ; CHECK-LABEL: define {{[^@]+}}@prop_range_direct
 ; CHECK-SAME: (i32 [[V:%.*]]) {
-; CHECK-NEXT:    call void @bar4(i32 [[V]])
+; CHECK-NEXT:    call void @bar4(i32 range(i32 1, 11) [[V]])
 ; CHECK-NEXT:    ret void
 ;
   call void @foo4(i32 range(i32 1, 11) %v)

>From 62a190bfac3c59feb9a97bb606e245fce23f3d08 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein....@gmail.com>
Date: Tue, 9 Apr 2024 14:36:08 -0500
Subject: [PATCH 4/5] [InstCombine] Add tests for folding `(icmp eq/ne (or
 (select cond, 0/NZ, 0/NZ), X), 0)`; NFC

---
 .../icmp-or-of-select-with-zero.ll            | 247 ++++++++++++++++++
 1 file changed, 247 insertions(+)
 create mode 100644 
llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll

diff --git a/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll 
b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll
new file mode 100644
index 00000000000000..45537c2f2fbfee
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare void @use.i8(i8)
+declare void @use.i1(i1)
+define i1 @src_tv_eq(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_tv_eq(
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 0, i8 %y
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  ret i1 %r
+}
+
+define i1 @src_tv_eq_multiuse_or_fail(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_tv_eq_multiuse_or_fail(
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    call void @use.i8(i8 [[SELX]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 0, i8 %y
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  call void @use.i8(i8 %selx)
+  ret i1 %r
+}
+
+define i1 @src_tv_eq_fail_tv_nonzero(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_tv_eq_fail_tv_nonzero(
+; CHECK-NEXT:    [[Y:%.*]] = add nsw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 1, i8 [[Y]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nsw i8 %yy, 1
+  %sel = select i1 %c0, i8 1, i8 %y
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  ret i1 %r
+}
+
+define i1 @src_fv_ne(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_fv_ne(
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 %y, i8 0
+  %selx = or i8 %sel, %x
+  %r = icmp ne i8 %selx, 0
+  ret i1 %r
+}
+
+define i1 @src_fv_ne_fail_maybe_zero(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_fv_ne_fail_maybe_zero(
+; CHECK-NEXT:    [[Y:%.*]] = add nsw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nsw i8 %yy, 1
+  %sel = select i1 %c0, i8 %y, i8 0
+  %selx = or i8 %sel, %x
+  %r = icmp ne i8 %selx, 0
+  ret i1 %r
+}
+
+define i1 @src_tv_ne(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_tv_ne(
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 0, i8 %y
+  %selx = or i8 %sel, %x
+  %r = icmp ne i8 %selx, 0
+  ret i1 %r
+}
+
+define i1 @src_tv_ne_fail_cmp_nonzero(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_tv_ne_fail_cmp_nonzero(
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 0, i8 %y
+  %selx = or i8 %sel, %x
+  %r = icmp ne i8 %selx, 1
+  ret i1 %r
+}
+
+define i1 @src_fv_eq(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_fv_eq(
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 %y, i8 0
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  ret i1 %r
+}
+
+define i1 @src_fv_eq_fail_cant_invert(i1 %c0, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_fv_eq_fail_cant_invert(
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 %y, i8 0
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  call void @use.i8(i8 %sel)
+  ret i1 %r
+}
+
+define i1 @src_fv_eq_fail_cant_invert2(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_fv_eq_fail_cant_invert2(
+; CHECK-NEXT:    [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]]
+; CHECK-NEXT:    [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL]])
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL_OTHER]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %c0 = icmp ugt i8 %a, %b
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 %y, i8 0
+  %cc = or i1 %c0, %c1
+  %sel_other = select i1 %cc, i8 %y, i8 %b
+
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  call void @use.i8(i8 %sel)
+  call void @use.i8(i8 %sel_other)
+  ret i1 %r
+}
+
+define i1 @src_fv_eq_invert2(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_fv_eq_invert2(
+; CHECK-NEXT:    [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]]
+; CHECK-NEXT:    [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL_OTHER]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %c0 = icmp ugt i8 %a, %b
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 %y, i8 0
+  %cc = or i1 %c0, %c1
+  %sel_other = select i1 %cc, i8 %y, i8 %b
+
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  call void @use.i8(i8 %sel_other)
+  ret i1 %r
+}
+
+define i1 @src_fv_eq_invert3(i8 %a, i8 %b, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_fv_eq_invert3(
+; CHECK-NEXT:    [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[SEL_OTHER:%.*]] = select i1 [[C0]], i8 [[Y]], i8 [[B]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL_OTHER]])
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %c0 = icmp ugt i8 %a, %b
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 %y, i8 0
+  %sel_other = select i1 %c0, i8 %y, i8 %b
+
+  %selx = or i8 %sel, %x
+  %r = icmp eq i8 %selx, 0
+  call void @use.i8(i8 %sel_other)
+  call void @use.i8(i8 %sel)
+  ret i1 %r
+}
+
+define i1 @src_tv_ne_invert(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_tv_ne_invert(
+; CHECK-NEXT:    [[NOT_C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    call void @use.i1(i1 [[NOT_C0]])
+; CHECK-NEXT:    [[C0:%.*]] = xor i1 [[NOT_C0]], true
+; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[NOT_C0]], i8 [[Y]], i8 0
+; CHECK-NEXT:    [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]]
+; CHECK-NEXT:    [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]]
+; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 0
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL]])
+; CHECK-NEXT:    call void @use.i8(i8 [[SEL_OTHER]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %not_c0 = icmp ugt i8 %a, %b
+  call void @use.i1(i1 %not_c0)
+  %c0 = xor i1 %not_c0, true
+  %y = add nuw i8 %yy, 1
+  %sel = select i1 %c0, i8 0, i8 %y
+  %cc = or i1 %c0, %c1
+  %sel_other = select i1 %cc, i8 %y, i8 %b
+
+  %selx = or i8 %sel, %x
+  %r = icmp ne i8 %selx, 0
+  call void @use.i8(i8 %sel)
+  call void @use.i8(i8 %sel_other)
+  ret i1 %r
+}

>From 6c1c79071149cb0feeb266f7673b0f79cbff5048 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein....@gmail.com>
Date: Tue, 9 Apr 2024 14:36:16 -0500
Subject: [PATCH 5/5] [InstCombine] Fold `(icmp eq/ne (or (select cond, 0/NZ,
 0/NZ), X), 0)`

Four cases:
`(icmp eq (or (select cond, 0, NonZero), Other))`
 -> `(and cond, (icmp eq Other, 0))`
`(icmp ne (or (select cond, NonZero, 0), Other))`
 -> `(or cond, (icmp ne Other, 0))`
`(icmp ne (or (select cond, 0, NonZero), Other))`
 -> `(or (not cond), (icmp ne Other, 0))`
`(icmp eq (or (select cond, NonZero, 0), Other))`
 -> `(and (not cond), (icmp eq Other, 0))`

These cases came up in tests on: #88088

Proofs: https://alive2.llvm.org/ce/z/ojGo_J
---
 .../InstCombine/InstCombineCompares.cpp       | 50 +++++++++++++++++++
 .../icmp-or-of-select-with-zero.ll            | 48 ++++++++----------
 2 files changed, 71 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c60a290ce72e06..b1bf7cdd51f090 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3483,6 +3483,56 @@ Instruction 
*InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
       Value *And = Builder.CreateAnd(BOp0, NotBOC);
       return new ICmpInst(Pred, And, NotBOC);
     }
+    // (icmp eq (or (select cond, 0, NonZero), Other))
+    //  -> (and cond, (icmp eq Other, 0))
+    // (icmp ne (or (select cond, NonZero, 0), Other))
+    //  -> (or cond, (icmp ne Other, 0))
+    // (icmp ne (or (select cond, 0, NonZero), Other))
+    //  -> (or (not cond), (icmp ne Other, 0))
+    // (icmp eq (or (select cond, NonZero, 0), Other))
+    //  -> (and (not cond), (icmp eq Other, 0))
+    Value *Cond, *TV, *FV, *Other;
+    if (C.isZero() && BO->hasOneUse() &&
+        match(BO, m_c_Or(m_Select(m_Value(Cond), m_Value(TV), m_Value(FV)),
+                         m_Value(Other)))) {
+      const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
+      // Easy case is if eq/ne matches whether 0 is trueval/falseval.
+      if (Pred == ICmpInst::ICMP_EQ
+              ? (match(TV, m_SpecificInt(C)) && isKnownNonZero(FV, Q))
+              : (match(FV, m_SpecificInt(C)) && isKnownNonZero(TV, Q))) {
+        Value *Cmp = Builder.CreateICmp(
+            Pred, Other, Constant::getNullValue(Other->getType()));
+        return BinaryOperator::Create(
+            Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or, 
Cmp,
+            Cond);
+      }
+      // Harder case is if eq/ne matches whether 0 is falseval/trueval. In this
+      // case we need to invert the select condition so we need to be careful 
to
+      // avoid creating extra instructions.
+      if (Pred == ICmpInst::ICMP_EQ
+              ? (match(FV, m_SpecificInt(C)) && isKnownNonZero(TV, Q))
+              : (match(TV, m_SpecificInt(C)) && isKnownNonZero(FV, Q))) {
+        Value *NotCond = nullptr;
+        // If the select is one use, we are essentially replacing select with
+        // `(not Cond)`.
+        if (match(BO, m_c_Or(m_OneUse(m_Select(m_Specific(Cond), 
m_Specific(TV),
+                                               m_Specific(FV))),
+                             m_Value())))
+          NotCond = Builder.CreateNot(Cond);
+        // Otherwise, see if we can get NotCond for free.
+        else
+          NotCond =
+              getFreelyInverted(Cond, /*WillInvertAllUses=*/false, &Builder);
+
+        if (NotCond) {
+          Value *Cmp = Builder.CreateICmp(
+              Pred, Other, Constant::getNullValue(Other->getType()));
+          return BinaryOperator::Create(
+              Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or,
+              Cmp, NotCond);
+        }
+      }
+    }
     break;
   }
   case Instruction::UDiv:
diff --git a/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll 
b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll
index 45537c2f2fbfee..0742066d693560 100644
--- a/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll
@@ -5,11 +5,9 @@ declare void @use.i8(i8)
 declare void @use.i1(i1)
 define i1 @src_tv_eq(i1 %c0, i8 %x, i8 %yy) {
 ; CHECK-LABEL: @src_tv_eq(
-; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]]
-; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX:%.*]], 0
+; CHECK-NEXT:    [[R1:%.*]] = and i1 [[R]], [[C0:%.*]]
+; CHECK-NEXT:    ret i1 [[R1]]
 ;
   %y = add nuw i8 %yy, 1
   %sel = select i1 %c0, i8 0, i8 %y
@@ -52,11 +50,9 @@ define i1 @src_tv_eq_fail_tv_nonzero(i1 %c0, i8 %x, i8 %yy) {
 
 define i1 @src_fv_ne(i1 %c0, i8 %x, i8 %yy) {
 ; CHECK-LABEL: @src_fv_ne(
-; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0
-; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX:%.*]], 0
+; CHECK-NEXT:    [[R1:%.*]] = or i1 [[R]], [[C0:%.*]]
+; CHECK-NEXT:    ret i1 [[R1]]
 ;
   %y = add nuw i8 %yy, 1
   %sel = select i1 %c0, i8 %y, i8 0
@@ -82,11 +78,10 @@ define i1 @src_fv_ne_fail_maybe_zero(i1 %c0, i8 %x, i8 %yy) 
{
 
 define i1 @src_tv_ne(i1 %c0, i8 %x, i8 %yy) {
 ; CHECK-LABEL: @src_tv_ne(
-; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]]
-; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[C0:%.*]], true
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX:%.*]], 0
+; CHECK-NEXT:    [[R1:%.*]] = or i1 [[R]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[R1]]
 ;
   %y = add nuw i8 %yy, 1
   %sel = select i1 %c0, i8 0, i8 %y
@@ -112,11 +107,10 @@ define i1 @src_tv_ne_fail_cmp_nonzero(i1 %c0, i8 %x, i8 
%yy) {
 
 define i1 @src_fv_eq(i1 %c0, i8 %x, i8 %yy) {
 ; CHECK-LABEL: @src_fv_eq(
-; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0
-; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[C0:%.*]], true
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX:%.*]], 0
+; CHECK-NEXT:    [[R1:%.*]] = and i1 [[R]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[R1]]
 ;
   %y = add nuw i8 %yy, 1
   %sel = select i1 %c0, i8 %y, i8 0
@@ -172,13 +166,13 @@ define i1 @src_fv_eq_invert2(i1 %c1, i8 %a, i8 %b, i8 %x, 
i8 %yy) {
 ; CHECK-LABEL: @src_fv_eq_invert2(
 ; CHECK-NEXT:    [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0
 ; CHECK-NEXT:    [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]]
 ; CHECK-NEXT:    [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]]
-; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[C0]], true
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[SELX:%.*]], 0
+; CHECK-NEXT:    [[R1:%.*]] = and i1 [[R]], [[TMP1]]
 ; CHECK-NEXT:    call void @use.i8(i8 [[SEL_OTHER]])
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 [[R1]]
 ;
   %c0 = icmp ugt i8 %a, %b
   %y = add nuw i8 %yy, 1
@@ -225,11 +219,11 @@ define i1 @src_tv_ne_invert(i1 %c1, i8 %a, i8 %b, i8 %x, 
i8 %yy) {
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[NOT_C0]], i8 [[Y]], i8 0
 ; CHECK-NEXT:    [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]]
 ; CHECK-NEXT:    [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]]
-; CHECK-NEXT:    [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX]], 0
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[SELX:%.*]], 0
+; CHECK-NEXT:    [[R1:%.*]] = or i1 [[R]], [[NOT_C0]]
 ; CHECK-NEXT:    call void @use.i8(i8 [[SEL]])
 ; CHECK-NEXT:    call void @use.i8(i8 [[SEL_OTHER]])
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 [[R1]]
 ;
   %not_c0 = icmp ugt i8 %a, %b
   call void @use.i1(i1 %not_c0)

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to