[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/97050

>From 5672042d638e13794e09d981f286fef487b05206 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 27 Jun 2024 16:32:48 +0200
Subject: [PATCH] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics

These are now fully covered by atomicrmw.
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |   4 -
 llvm/lib/IR/AutoUpgrade.cpp   |  14 +-
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |   2 -
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   2 -
 .../Target/AMDGPU/AMDGPUSearchableTables.td   |   2 -
 llvm/lib/Target/AMDGPU/FLATInstructions.td|   2 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |   6 +-
 llvm/test/Bitcode/amdgcn-atomic.ll|  22 ++
 .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll| 106 -
 .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 218 --
 llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll | 193 
 11 files changed, 33 insertions(+), 538 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8c25467cc5e4b..e24571d8b184c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2990,10 +2990,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic {
 def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic;
 }
 
-// bf16 atomics use v2i16 argument since there is no bf16 data type in the 
llvm.
-def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn;
-def int_amdgcn_flat_atomic_fadd_v2bf16   : AMDGPUAtomicRtn;
-
 defset list AMDGPUMFMAIntrinsics940 = {
 def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic;
 def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 53de9eef516b3..f566a0e3c3043 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, 
Function *,
   }
 
   if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
-  Name.starts_with("ds.fmax")) {
+  Name.starts_with("ds.fmax") ||
+  Name.starts_with("global.atomic.fadd.v2bf16") ||
+  Name.starts_with("flat.atomic.fadd.v2bf16")) {
 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
 // declaration.
 NewFn = nullptr;
@@ -4042,7 +4044,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, 
CallBase *CI,
   .StartsWith("ds.fmin", AtomicRMWInst::FMin)
   .StartsWith("ds.fmax", AtomicRMWInst::FMax)
   .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
-  .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
+  .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
+  .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
+  .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd);
 
   unsigned NumOperands = CI->getNumOperands();
   if (NumOperands < 3) // Malformed bitcode.
@@ -4097,8 +4101,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, 
CallBase *CI,
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
   if (PtrTy->getAddressSpace() != 3) {
-RMW->setMetadata("amdgpu.no.fine.grained.memory",
- MDNode::get(F->getContext(), {}));
+MDNode *EmptyMD = MDNode::get(F->getContext(), {});
+RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
+if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
+  RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
   }
 
   if (IsVolatile)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index c6dbc58395e48..db8b44149cf47 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op {
 
 defm int_amdgcn_flat_atomic_fadd : noret_op;
 defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
-defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_flat_atomic_fmin : noret_op;
 defm int_amdgcn_flat_atomic_fmax : noret_op;
 defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op;
 defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
-defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_global_atomic_fmin : noret_op;
 defm int_amdgcn_global_atomic_fmax : noret_op;
 defm int_amdgcn_global_atomic_csub : noret_op;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 9a6ba5ac68084..5e4f9f4365be0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4896,8 +4896,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const 
MachineInstr ) const {
 case Intrinsic::amdgcn_flat_atomic_fmax:
 case 

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (PR #96876)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96876

>From 55fc7946a4480b2dd1befd579805623a56f5fd1a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 23:18:32 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max
 f64 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 36 +--
 .../builtins-fp-atomics-gfx90a.cl | 18 ++
 2 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3afe3135d99d6..b9f2c0f510b1b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18777,32 +18777,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
-Intrinsic::ID IID;
-llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
-switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
-  IID = Intrinsic::amdgcn_global_atomic_fmin;
-  break;
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
-  IID = Intrinsic::amdgcn_global_atomic_fmax;
-  break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
-  IID = Intrinsic::amdgcn_flat_atomic_fmin;
-  break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
-  IID = Intrinsic::amdgcn_flat_atomic_fmax;
-  break;
-}
-llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
-llvm::Value *Val = EmitScalarExpr(E->getArg(1));
-llvm::Function *F =
-CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
-return Builder.CreateCall(F, {Addr, Val});
-  }
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19186,7 +19160,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19213,8 +19191,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
   BinOp = llvm::AtomicRMWInst::FMin;
   break;
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
   BinOp = llvm::AtomicRMWInst::FMax;
   break;
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index 9381ce951df3e..556e553903d1a 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) {
 }
 
 // CHECK-LABEL: test_global_global_min_f64
-// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr 
addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} 
syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+
 // GFX90A-LABEL:  test_global_global_min_f64$local
 // GFX90A:  global_atomic_min_f64
 void test_global_global_min_f64(__global double *addr, double x){
@@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double 
x){
 }
 
 // CHECK-LABEL: test_global_max_f64
-// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr 
addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} 
syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+
 // GFX90A-LABEL:  test_global_max_f64$local
 // GFX90A:  global_atomic_max_f64
 void test_global_max_f64(__global double *addr, double x){
@@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr, 

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (PR #96875)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96875

>From 8ac629544dcf9fa4c35310abb89491b77e3292ba Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 19:34:43 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16
 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 26 ++-
 .../builtins-fp-atomics-gfx12.cl  | 24 -
 .../builtins-fp-atomics-gfx90a.cl |  6 ++---
 .../builtins-fp-atomics-gfx940.cl | 14 +++---
 4 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c19a80921beaf..3afe3135d99d6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18803,22 +18803,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
 return Builder.CreateCall(F, {Addr, Val});
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
-Intrinsic::ID IID;
-switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
-  IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
-  break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
-  IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
-  break;
-}
-llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
-llvm::Value *Val = EmitScalarExpr(E->getArg(1));
-llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
-return Builder.CreateCall(F, {Addr, Val});
-  }
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19200,7 +19184,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19222,6 +19208,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19266,7 +19254,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   AO = AtomicOrdering::Monotonic;
 
   // The v2bf16 builtin uses i16 instead of a natural bfloat type.
-  if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
+  if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
+  BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
+  BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
 llvm::Type *V2BF16Ty = FixedVectorType::get(
 llvm::Type::getBFloatTy(Builder.getContext()), 2);
 Val = Builder.CreateBitCast(Val, V2BF16Ty);
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
index 07e63a8711c7f..e8b6eb57c38d7 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
@@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2;
 
 // CHECK-LABEL: test_local_add_2bf16
 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
-// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> 
[[BC0]] syncscope("agent") monotonic, align 4
+// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x 
bfloat> [[BC0]] syncscope("agent") monotonic, align 4
 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
 
 // GFX12-LABEL:  test_local_add_2bf16
@@ -48,7 +48,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
 }
 
 // CHECK-LABEL: test_flat_add_2f16
-// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} 
syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
 
 // GFX12-LABEL:  

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96874

>From 2c443d8a9daeb42234e585d0d9547634409952a9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 19:15:26 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64}
 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp | 17 ++---
 .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl |  6 --
 .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl |  3 ++-
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ef4bd9fb4af09..c19a80921beaf 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18779,10 +18779,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   }
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
 Intrinsic::ID IID;
 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
 switch (BuiltinID) {
@@ -18792,19 +18790,12 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
   IID = Intrinsic::amdgcn_global_atomic_fmax;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
   IID = Intrinsic::amdgcn_flat_atomic_fmin;
   break;
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
   IID = Intrinsic::amdgcn_flat_atomic_fmax;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
-  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 }
 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19207,7 +19198,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19227,6 +19220,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index cd10777dbe079..02e289427238f 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){
 }
 
 // CHECK-LABEL: test_flat_add_local_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr 
addrspace(3) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} 
syncscope("agent") seq_cst, align 8{{$}}
+
 // GFX90A-LABEL:  test_flat_add_local_f64$local
 // GFX90A:  ds_add_rtn_f64
 void test_flat_add_local_f64(__local double *addr, double x){
@@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){
 }
 
 // CHECK-LABEL: test_flat_global_add_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr 
addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} 
syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+
 // GFX90A-LABEL:  test_flat_global_add_f64$local
 // GFX90A:  global_atomic_add_f64
 void test_flat_global_add_f64(__global double *addr, double x){
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
index 589dcd406630d..bd9b8c7268e06 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
@@ -10,7 +10,8 @@ typedef half  

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96873

>From 367f6897698f22c30cb7491d90ae0251bfa57af1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 19:12:59 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from
 {global|flat}_atomic_fadd_v2f16 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 20 ++-
 .../builtins-fp-atomics-gfx12.cl  |  9 ++---
 .../builtins-fp-atomics-gfx90a.cl |  2 +-
 .../builtins-fp-atomics-gfx940.cl |  3 ++-
 4 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ad4cce77221a6..ef4bd9fb4af09 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18777,22 +18777,15 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
 Intrinsic::ID IID;
 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
 switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
-  ArgTy = llvm::FixedVectorType::get(
-  llvm::Type::getHalfTy(getLLVMContext()), 2);
-  IID = Intrinsic::amdgcn_global_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   IID = Intrinsic::amdgcn_global_atomic_fmin;
   break;
@@ -18812,11 +18805,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   ArgTy = llvm::Type::getFloatTy(getLLVMContext());
   IID = Intrinsic::amdgcn_flat_atomic_fadd;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
-  ArgTy = llvm::FixedVectorType::get(
-  llvm::Type::getHalfTy(getLLVMContext()), 2);
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 }
 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19217,7 +19205,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_ds_fminf:
   case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: {
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19235,6 +19225,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
index 6b8a6d14575db..07e63a8711c7f 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
@@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
 }
 
 // CHECK-LABEL: test_flat_add_2f16
-// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr 
%{{.*}}, <2 x half> %{{.*}})
+// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+
 // GFX12-LABEL:  test_flat_add_2f16
 // GFX12: flat_atomic_pk_add_f16
 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
@@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) {
 }
 
 // CHECK-LABEL: test_global_add_half2
-// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr 
addrspace(1) %{{.*}}, <2 x half> %{{.*}})
+// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> 
%{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+
 // GFX12-LABEL:  test_global_add_half2
 // GFX12:  global_atomic_pk_add_f16 v2, v[0:1], v2, 

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96872

>From ea17c792053e32e39a7261e3bdf1673d98e4d94a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 11 Jun 2024 10:58:44 +0200
Subject: [PATCH 1/2] clang/AMDGPU: Emit atomicrmw for
 __builtin_amdgcn_global_atomic_fadd_{f32|f64}

Need to emit syncscope and new metadata to get the native instruction,
most of the time.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 39 +--
 .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl|  2 +-
 .../builtins-fp-atomics-gfx12.cl  |  4 +-
 .../builtins-fp-atomics-gfx90a.cl |  4 +-
 .../builtins-fp-atomics-gfx940.cl |  4 +-
 5 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0c2ee446aa303..02f85f340893d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -58,6 +58,7 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ScopedPrinter.h"
@@ -18776,8 +18777,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
@@ -18789,18 +18788,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 Intrinsic::ID IID;
 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
 switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
-  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
-  IID = Intrinsic::amdgcn_global_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   ArgTy = llvm::FixedVectorType::get(
   llvm::Type::getHalfTy(getLLVMContext()), 2);
   IID = Intrinsic::amdgcn_global_atomic_fadd;
   break;
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
-  IID = Intrinsic::amdgcn_global_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   IID = Intrinsic::amdgcn_global_atomic_fmin;
   break;
@@ -19223,7 +19215,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
   case AMDGPU::BI__builtin_amdgcn_ds_faddf:
   case AMDGPU::BI__builtin_amdgcn_ds_fminf:
-  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
+  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19239,6 +19233,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19273,8 +19269,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
   EmitScalarExpr(E->getArg(3)), AO, SSID);
 } else {
-  // The ds_atomic_fadd_* builtins do not have syncscope/order arguments.
-  SSID = llvm::SyncScope::System;
+  // Most of the builtins do not have syncscope/order arguments. For DS
+  // atomics the scope doesn't really matter, as they implicitly operate at
+  // workgroup scope.
+  //
+  // The global/flat cases need to use agent scope to consistently produce
+  // the native instruction instead of a cmpxchg expansion.
+  SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
   AO = AtomicOrdering::SequentiallyConsistent;
 
   // The v2bf16 builtin uses i16 instead of a natural bfloat type.
@@ -19289,6 +19290,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
 if (Volatile)
   RMW->setVolatile(true);
+
+unsigned AddrSpace = Ptr.getType()->getAddressSpace();
+if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
+  // Most targets require "amdgpu.no.fine.grained.memory" to emit the 

[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits


@@ -75,6 +75,11 @@ Changes to the AArch64 Backend
 Changes to the AMDGPU Backend
 -
 
+* Removed ``llvm.amdgcn.flat.atomic.fadd`` and
+  ``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the
+  :ref:`atomicrmw ` instruction with `fadd` and

arsenm wrote:

This refers to i_atomicrmw? The documentation bot passes.

https://github.com/llvm/llvm-project/pull/97051
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits


@@ -1017,29 +1015,6 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, 
double %data) {

arsenm wrote:

Depends if they are redundant or not. Some cases already tested atomicrmw, and 
had the intrinsic alongside it. We still have a lot of redundancy spread across 
multiple files 

https://github.com/llvm/llvm-project/pull/97051
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits


@@ -322,4 +322,36 @@ define <2 x i16> 
@upgrade_amdgcn_global_atomic_fadd_v2bf16_p1(ptr addrspace(1) %
   ret <2 x i16> %result
 }
 
+declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr nocapture, 
<2 x half>) #0

arsenm wrote:

Yes, but also no. These tests should use llvm-as/llvm-dis instead of opt, and 
the update scripts don't understand that 

https://github.com/llvm/llvm-project/pull/97051
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96874

>From c8a9e8de2d0faf678ab8d67c85c4efd8312d5d10 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 19:15:26 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64}
 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp | 17 ++---
 .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl |  6 --
 .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl |  3 ++-
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ef4bd9fb4af09..c19a80921beaf 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18779,10 +18779,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   }
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
 Intrinsic::ID IID;
 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
 switch (BuiltinID) {
@@ -18792,19 +18790,12 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
   IID = Intrinsic::amdgcn_global_atomic_fmax;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
   IID = Intrinsic::amdgcn_flat_atomic_fmin;
   break;
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
   IID = Intrinsic::amdgcn_flat_atomic_fmax;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
-  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 }
 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19207,7 +19198,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19227,6 +19220,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index cd10777dbe079..02e289427238f 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){
 }
 
 // CHECK-LABEL: test_flat_add_local_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr 
addrspace(3) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} 
syncscope("agent") seq_cst, align 8{{$}}
+
 // GFX90A-LABEL:  test_flat_add_local_f64$local
 // GFX90A:  ds_add_rtn_f64
 void test_flat_add_local_f64(__local double *addr, double x){
@@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){
 }
 
 // CHECK-LABEL: test_flat_global_add_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr 
addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} 
syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+
 // GFX90A-LABEL:  test_flat_global_add_f64$local
 // GFX90A:  global_atomic_add_f64
 void test_flat_global_add_f64(__global double *addr, double x){
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
index 589dcd406630d..bd9b8c7268e06 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
@@ -10,7 +10,8 @@ typedef half  

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96873

>From 7305c0477711f7b26e4ebad3cca0afa33e1defa9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 19:12:59 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from
 {global|flat}_atomic_fadd_v2f16 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 20 ++-
 .../builtins-fp-atomics-gfx12.cl  |  9 ++---
 .../builtins-fp-atomics-gfx90a.cl |  2 +-
 .../builtins-fp-atomics-gfx940.cl |  3 ++-
 4 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ad4cce77221a6..ef4bd9fb4af09 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18777,22 +18777,15 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
 Intrinsic::ID IID;
 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
 switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
-  ArgTy = llvm::FixedVectorType::get(
-  llvm::Type::getHalfTy(getLLVMContext()), 2);
-  IID = Intrinsic::amdgcn_global_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   IID = Intrinsic::amdgcn_global_atomic_fmin;
   break;
@@ -18812,11 +18805,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   ArgTy = llvm::Type::getFloatTy(getLLVMContext());
   IID = Intrinsic::amdgcn_flat_atomic_fadd;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
-  ArgTy = llvm::FixedVectorType::get(
-  llvm::Type::getHalfTy(getLLVMContext()), 2);
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 }
 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19217,7 +19205,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_ds_fminf:
   case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: {
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19235,6 +19225,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
index 6b8a6d14575db..07e63a8711c7f 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
@@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
 }
 
 // CHECK-LABEL: test_flat_add_2f16
-// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr 
%{{.*}}, <2 x half> %{{.*}})
+// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+
 // GFX12-LABEL:  test_flat_add_2f16
 // GFX12: flat_atomic_pk_add_f16
 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
@@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) {
 }
 
 // CHECK-LABEL: test_global_add_half2
-// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr 
addrspace(1) %{{.*}}, <2 x half> %{{.*}})
+// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> 
%{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+
 // GFX12-LABEL:  test_global_add_half2
 // GFX12:  global_atomic_pk_add_f16 v2, v[0:1], v2, 

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96872

>From 2e27b153cf40498f64ef9f13b69e80804c45a6a4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 11 Jun 2024 10:58:44 +0200
Subject: [PATCH 1/2] clang/AMDGPU: Emit atomicrmw for
 __builtin_amdgcn_global_atomic_fadd_{f32|f64}

Need to emit syncscope and new metadata to get the native instruction,
most of the time.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 39 +--
 .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl|  2 +-
 .../builtins-fp-atomics-gfx12.cl  |  4 +-
 .../builtins-fp-atomics-gfx90a.cl |  4 +-
 .../builtins-fp-atomics-gfx940.cl |  4 +-
 5 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0c2ee446aa303..02f85f340893d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -58,6 +58,7 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ScopedPrinter.h"
@@ -18776,8 +18777,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
@@ -18789,18 +18788,11 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 Intrinsic::ID IID;
 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
 switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
-  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
-  IID = Intrinsic::amdgcn_global_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   ArgTy = llvm::FixedVectorType::get(
   llvm::Type::getHalfTy(getLLVMContext()), 2);
   IID = Intrinsic::amdgcn_global_atomic_fadd;
   break;
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
-  IID = Intrinsic::amdgcn_global_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   IID = Intrinsic::amdgcn_global_atomic_fmin;
   break;
@@ -19223,7 +19215,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
   case AMDGPU::BI__builtin_amdgcn_ds_faddf:
   case AMDGPU::BI__builtin_amdgcn_ds_fminf:
-  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
+  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19239,6 +19233,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19273,8 +19269,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
   EmitScalarExpr(E->getArg(3)), AO, SSID);
 } else {
-  // The ds_atomic_fadd_* builtins do not have syncscope/order arguments.
-  SSID = llvm::SyncScope::System;
+  // Most of the builtins do not have syncscope/order arguments. For DS
+  // atomics the scope doesn't really matter, as they implicitly operate at
+  // workgroup scope.
+  //
+  // The global/flat cases need to use agent scope to consistently produce
+  // the native instruction instead of a cmpxchg expansion.
+  SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
   AO = AtomicOrdering::SequentiallyConsistent;
 
   // The v2bf16 builtin uses i16 instead of a natural bfloat type.
@@ -19289,6 +19290,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
 if (Volatile)
   RMW->setVolatile(true);
+
+unsigned AddrSpace = Ptr.getType()->getAddressSpace();
+if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
+  // Most targets require "amdgpu.no.fine.grained.memory" to emit the 

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/96874
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/96873
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/96872
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)

2024-08-01 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/96760
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Enable vectorization of v2f16 copysign (PR #100799)

2024-07-29 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100799

>From ba0f8f03dc491562050a65456f7ebda23a7e4210 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 22:36:33 +0400
Subject: [PATCH] AMDGPU: Enable vectorization of v2f16 copysign

---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   3 +
 .../Analysis/CostModel/AMDGPU/copysign.ll | 256 +-
 .../SLPVectorizer/AMDGPU/slp-v2f16.ll |  16 +-
 3 files changed, 139 insertions(+), 136 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index d09f4fb2f659b..9e89898b11bcb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -688,6 +688,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   switch (ID) {
   case Intrinsic::fma:
   case Intrinsic::fmuladd:
+  case Intrinsic::copysign:
   // There's a small benefit to using vector ops in the legalized code.
   case Intrinsic::round:
   case Intrinsic::uadd_sat:
@@ -739,6 +740,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
  : getQuarterRateInstrCost(CostKind);
 }
 break;
+  case Intrinsic::copysign:
+return NElts * getFullRateInstrCost();
   case Intrinsic::uadd_sat:
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
index 3b7b1b4238b8a..06a058ff2e7b1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
@@ -12,90 +12,90 @@
 define void @copysign_f16() {
 ; BASE-LABEL: 'copysign_f16'
 ; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = 
call half @llvm.copysign.f16(half undef, half undef)
-; BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 
= call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 
= call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 
= call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> 
undef)
-; BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> 
undef)
-; BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> 
undef)
-; BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> 
undef)
+; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 
= call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3f16 
= call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
+; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16 
= call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5f16 
= call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
+; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 
= call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9f16 
= call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
+; BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> 
undef)
 ; BASE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
 ;
 ; GFX8-LABEL: 'copysign_f16'
 ; GFX8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = 
call half @llvm.copysign.f16(half undef, half undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16 
= call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v3f16 
= call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f16 
= call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v5f16 
= call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-28 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From 6a7346484924acdfbd630096e3dbbb4b14474028 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

Also adjust the AMDGPU cost.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  32 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   9 +-
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +-
 .../Analysis/CostModel/AMDGPU/arith-ssat.ll   |  32 +-
 .../Analysis/CostModel/AMDGPU/arith-usat.ll   |  32 +-
 5 files changed, 242 insertions(+), 231 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..8ae236850b982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+  case Intrinsic::abs:
 return true;
   default:
 return false;
@@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && SLT == MVT::f16) ||
+  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
@@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+// TODO: Full rate for i32/i16
 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
 if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; }))
   NElts = 1;
 break;
+  case Intrinsic::abs:
+// Expansion takes 2 instructions for VALU
+if (SLT == MVT::i16 || SLT == MVT::i32)
+  InstRate = 2 * getFullRateInstrCost();
+break;
   }
 
   return LT.first * NElts * InstRate;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..b86e99558377b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -14,116 +14,116 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-07-28 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100521

>From 19f7331a579837b2657a5d0741c6633d6f8296da Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:33:23 +0400
Subject: [PATCH] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  56 +--
 llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll | 116 ++---
 llvm/test/Analysis/CostModel/X86/fptoi_sat.ll | 400 +-
 .../AggressiveInstCombine/ARM/fptosisat.ll|  49 ++-
 4 files changed, 324 insertions(+), 297 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 1a089a3fa9634..ba70498bfb731 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2179,31 +2179,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::UMULO;
   break;
 case Intrinsic::fptosi_sat:
-case Intrinsic::fptoui_sat: {
-  if (Tys.empty())
-break;
-  Type *FromTy = Tys[0];
-  bool IsSigned = IID == Intrinsic::fptosi_sat;
-
-  InstructionCost Cost = 0;
-  IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,
- {FromTy, FromTy});
-  Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
-  IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,
- {FromTy, FromTy});
-  Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
-  Cost += thisT()->getCastInstrCost(
-  IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
-  TTI::CastContextHint::None, CostKind);
-  if (IsSigned) {
-Type *CondTy = RetTy->getWithNewBitWidth(1);
-Cost += thisT()->getCmpSelInstrCost(
-BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
-Cost += thisT()->getCmpSelInstrCost(
-BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, 
CostKind);
-  }
-  return Cost;
-}
+  ISD = ISD::FP_TO_SINT_SAT;
+  break;
+case Intrinsic::fptoui_sat:
+  ISD = ISD::FP_TO_UINT_SAT;
+  break;
 case Intrinsic::ctpop:
   ISD = ISD::CTPOP;
   // In case of legalization use TCC_Expensive. This is cheaper than a
@@ -2418,6 +2398,32 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::fptosi_sat:
+case Intrinsic::fptoui_sat: {
+  if (Tys.empty())
+break;
+  Type *FromTy = Tys[0];
+  bool IsSigned = IID == Intrinsic::fptosi_sat;
+
+  InstructionCost Cost = 0;
+  IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,
+ {FromTy, FromTy});
+  Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
+  IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,
+ {FromTy, FromTy});
+  Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
+  Cost += thisT()->getCastInstrCost(
+  IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
+  TTI::CastContextHint::None, CostKind);
+  if (IsSigned) {
+Type *CondTy = RetTy->getWithNewBitWidth(1);
+Cost += thisT()->getCmpSelInstrCost(
+BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
+Cost += thisT()->getCmpSelInstrCost(
+BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, 
CostKind);
+  }
+  return Cost;
+}
 default:
   break;
 }
diff --git a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll 
b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll
index aff7b19a9c87a..29c86fc778a98 100644
--- a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll
@@ -4,26 +4,26 @@
 
 define void @casts() {
 ; CHECK-MVE-LABEL: 'casts'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: 
%f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: 
%f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: 
%f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: 
%f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: 
%f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: 
%f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: 
%f32s32 = call i32 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-28 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100519

>From 411c9c8f9fff386807a4ff6317dbec8a3eb1cd1a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  | 67 ++-
 .../Analysis/CostModel/X86/arith-overflow.ll  |  8 +--
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index 963bb8a9d9fac..71bc6b5375c73 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1080,7 +1080,7 @@ define i32 @smul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 
= call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: 
%V16I8 = call { <16 x i8>, 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-28 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100520

>From fc18583308ccaaf60bd234af160888a669648fef Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Correct costs of saturating add/sub intrinsics (PR #100808)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100808
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Correct costs of saturating add/sub intrinsics (PR #100808)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100808?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100808** https://app.graphite.dev/github/pr/llvm/llvm-project/100808?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100523** https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100808
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From b448d7ddbf60e4678daf2d8ec522a82ceca7d7a3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

Also adjust the AMDGPU cost.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  32 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   9 +-
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +-
 .../Analysis/CostModel/AMDGPU/arith-ssat.ll   |  32 +-
 .../Analysis/CostModel/AMDGPU/arith-usat.ll   |  32 +-
 5 files changed, 242 insertions(+), 231 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..8ae236850b982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+  case Intrinsic::abs:
 return true;
   default:
 return false;
@@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && SLT == MVT::f16) ||
+  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
@@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+// TODO: Full rate for i32/i16
 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
 if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; }))
   NElts = 1;
 break;
+  case Intrinsic::abs:
+// Expansion takes 2 instructions for VALU
+if (SLT == MVT::i16 || SLT == MVT::i32)
+  InstRate = 2 * getFullRateInstrCost();
+break;
   }
 
   return LT.first * NElts * InstRate;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..b86e99558377b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -14,116 +14,116 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100520

>From c382d2f8f2e2d0660bd3f1db5007e2a5f3cfa3cc Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100519

>From f154bdbc4048a943d23480ca00b894f0853bdf73 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  | 67 ++-
 .../Analysis/CostModel/X86/arith-overflow.ll  |  8 +--
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index 963bb8a9d9fac..71bc6b5375c73 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1080,7 +1080,7 @@ define i32 @smul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 
= call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: 
%V16I8 = call { <16 x i8>, 

[llvm-branch-commits] [llvm] DAG: Lower fcNormal is.fpclass to compare with inf (PR #100389)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100389

>From f515257afc80ac1874ffb0e3d2697b2447a1bf5f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 1 Feb 2023 09:06:59 -0400
Subject: [PATCH] DAG: Lower fcNormal is.fpclass to compare with inf

Looks worse for x86 without the fabs check. Not sure if
this is useful for any targets.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 25 +++
 1 file changed, 25 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 18cd368e24259..dcc65549d7a0e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8673,6 +8673,31 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
 IsOrdered ? OrderedOp : UnorderedOp);
   }
 }
+
+if (FPTestMask == fcNormal) {
+  // TODO: Handle unordered
+  ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
+  ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
+
+  if (isCondCodeLegalOrCustom(IsFiniteOp,
+  OperandVT.getScalarType().getSimpleVT()) &&
+  isCondCodeLegalOrCustom(IsNormalOp,
+  OperandVT.getScalarType().getSimpleVT()) &&
+  isFAbsFree(OperandVT)) {
+// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
+SDValue Inf =
+DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
+SDValue SmallestNormal = DAG.getConstantFP(
+APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
+
+SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
+SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
+SDValue IsNormal =
+DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
+unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
+return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
+  }
+}
   }
 
   // Some checks may be represented as inversion of simpler check, for example

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100380

>From 6226f310c474650b267a41d2509df5d0396ac481 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 1 Feb 2023 09:52:34 -0400
Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp

InstCombine also should have taken care of this, but this
should be helpful when the fcmp based lowering strategy tries
to combine multiple tests.
---
 llvm/lib/CodeGen/CodeGenCommonISel.cpp|   2 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  25 +++-
 llvm/test/CodeGen/AArch64/isinf.ll|  22 ++-
 llvm/test/CodeGen/X86/is_fpclass-fp80.ll  |  52 +++
 llvm/test/CodeGen/X86/is_fpclass.ll   | 137 +-
 5 files changed, 127 insertions(+), 111 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp 
b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index 88c643c568027..942cf442e9098 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest 
Test, bool UseFCmp) {
   case fcSubnormal | fcZero | fcNan:
 return InvertedTest;
   case fcInf | fcNan:
+  case fcPosInf | fcNan:
+  case fcNegInf | fcNan:
 // If we're trying to use fcmp, we can take advantage of the nan check
 // behavior of the compare (but this is more instructions in the integer
 // expansion).
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1e12d7937ba79..18cd368e24259 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8628,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
   return DAG.getSetCC(DL, ResultVT, Op, Op,
   IsInvertedFP ? ISD::SETO : ISD::SETUO);
 
-bool IsOrderedInf = FPTestMask == fcInf;
-if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
-isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
- : UnorderedCmpOpcode,
-OperandVT.getScalarType().getSimpleVT()) &&
-isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+if (OrderedFPTestMask == fcInf &&
+isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
+  : UnorderedCmpOpcode,
+OperandVT.getScalarType().getSimpleVT())) {
   // isinf(x) --> fabs(x) == inf
   SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
   SDValue Inf =
   DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
   return DAG.getSetCC(DL, ResultVT, Abs, Inf,
-  IsOrderedInf ? OrderedCmpOpcode : 
UnorderedCmpOpcode);
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
+}
+
+if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) {
+  // isposinf(x) --> x == inf
+  // isneginf(x) --> x == -inf
+  // isposinf(x) || nan --> x u== inf
+  // isneginf(x) || nan --> x u== -inf
+
+  SDValue Inf = DAG.getConstantFP(
+  APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
+  OperandVT);
+  return DAG.getSetCC(DL, ResultVT, Op, Inf,
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
 }
 
 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll 
b/llvm/test/CodeGen/AArch64/isinf.ll
index 834417b98743a..458bd7eeba16c 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,14 +58,22 @@ define i32 @replace_isinf_call_f64(double %x) {
 define i32 @replace_isinf_call_f128(fp128 %x) {
 ; CHECK-LABEL: replace_isinf_call_f128:
 ; CHECK:   // %bb.0:
-; CHECK-NEXT:str q0, [sp, #-16]!
-; CHECK-NEXT:.cfi_def_cfa_offset 16
-; CHECK-NEXT:ldp x9, x8, [sp], #16
-; CHECK-NEXT:and x8, x8, #0x7fff
-; CHECK-NEXT:eor x8, x8, #0x7fff
-; CHECK-NEXT:orr x8, x9, x8
-; CHECK-NEXT:cmp x8, #0
+; CHECK-NEXT:sub sp, sp, #32
+; CHECK-NEXT:str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 32
+; CHECK-NEXT:.cfi_offset w30, -16
+; CHECK-NEXT:str q0, [sp]
+; CHECK-NEXT:ldrb w8, [sp, #15]
+; CHECK-NEXT:and w8, w8, #0x7f
+; CHECK-NEXT:strb w8, [sp, #15]
+; CHECK-NEXT:adrp x8, .LCPI3_0
+; CHECK-NEXT:ldr q0, [sp]
+; CHECK-NEXT:ldr q1, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT:bl __eqtf2
+; CHECK-NEXT:cmp w0, #0
+; CHECK-NEXT:ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:cset w0, eq
+; CHECK-NEXT:add sp, sp, #32
 ; CHECK-NEXT:ret
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
   %cmpinf = fcmp oeq fp128 %abs, 0xL7FFF
diff --git 

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Select all constants in tablegen (PR #100788)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100788
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Cleanup immediate selection patterns (PR #100787)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100787
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Select all constants in tablegen (PR #100788)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100788?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100788** https://app.graphite.dev/github/pr/llvm/llvm-project/100788?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100787** https://app.graphite.dev/github/pr/llvm/llvm-project/100787?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100786** https://app.graphite.dev/github/pr/llvm/llvm-project/100786?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100788
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Cleanup immediate selection patterns (PR #100787)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100787?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100788** https://app.graphite.dev/github/pr/llvm/llvm-project/100788?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100787** https://app.graphite.dev/github/pr/llvm/llvm-project/100787?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100786** https://app.graphite.dev/github/pr/llvm/llvm-project/100786?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100787
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Select all constants in tablegen (PR #100788)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100788

This regresses the arbitrary address space pointer case. Ideally
we could write a pattern that matches a pointer based only on
its size, but using iPTR/iPTRAny seem to not work for this.

>From e75e929777d8ffc856427fdf70df10a94650cd26 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 26 Jul 2024 21:32:22 +0400
Subject: [PATCH] AMDGPU/GlobalISel: Select all constants in tablegen

This regresses the arbitrary address space pointer case. Ideally
we could write a pattern that matches a pointer based only on
its size, but using iPTR/iPTRAny seem to not work for this.
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  97 +-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   1 -
 llvm/lib/Target/AMDGPU/SIInstructions.td  |  44 ---
 .../GlobalISel/inst-select-constant.mir   | 120 ++
 4 files changed, 62 insertions(+), 200 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9a73629b0f0cd..73f3921b2ff4c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2503,98 +2503,6 @@ bool 
AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr ) const {
   return false;
 }
 
-bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr ) const {
-  if (selectImpl(I, *CoverageInfo))
-return true;
-
-  // FIXME: Relying on manual selection for 64-bit case, and pointer typed
-  // constants.
-  MachineBasicBlock *BB = I.getParent();
-  MachineOperand  = I.getOperand(1);
-  Register DstReg = I.getOperand(0).getReg();
-  LLT Ty = MRI->getType(DstReg);
-  unsigned Size = Ty.getSizeInBits();
-  assert((Size == 64 || Ty.isPointer()) &&
- "patterns should have selected this");
-
-  bool IsFP = false;
-
-  // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
-  if (ImmOp.isFPImm()) {
-const APInt  = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
-ImmOp.ChangeToImmediate(Imm.getZExtValue());
-IsFP = true;
-  } else if (ImmOp.isCImm()) {
-ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue());
-  } else {
-llvm_unreachable("Not supported by g_constants");
-  }
-
-  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
-  const bool IsSgpr = DstRB->getID() == AMDGPU::SGPRRegBankID;
-
-  unsigned Opcode;
-  if (DstRB->getID() == AMDGPU::VCCRegBankID) {
-Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
-  } else if (Size == 64 &&
- AMDGPU::isValid32BitLiteral(I.getOperand(1).getImm(), IsFP)) {
-Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
-I.setDesc(TII.get(Opcode));
-I.addImplicitDefUseOperands(*MF);
-return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  } else {
-Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
-
-// We should never produce s1 values on banks other than VCC. If the user 
of
-// this already constrained the register, we may incorrectly think it's VCC
-// if it wasn't originally.
-if (Size == 1)
-  return false;
-  }
-
-  if (Size != 64) {
-I.setDesc(TII.get(Opcode));
-I.addImplicitDefUseOperands(*MF);
-return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  }
-
-  const DebugLoc  = I.getDebugLoc();
-
-  APInt Imm(Size, I.getOperand(1).getImm());
-
-  MachineInstr *ResInst;
-  if (IsSgpr && TII.isInlineConstant(Imm)) {
-ResInst = BuildMI(*BB, , DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
-  .addImm(I.getOperand(1).getImm());
-  } else {
-const TargetRegisterClass *RC = IsSgpr ?
-  ::SReg_32RegClass : ::VGPR_32RegClass;
-Register LoReg = MRI->createVirtualRegister(RC);
-Register HiReg = MRI->createVirtualRegister(RC);
-
-BuildMI(*BB, , DL, TII.get(Opcode), LoReg)
-  .addImm(Imm.trunc(32).getZExtValue());
-
-BuildMI(*BB, , DL, TII.get(Opcode), HiReg)
-  .addImm(Imm.ashr(32).getZExtValue());
-
-ResInst = BuildMI(*BB, , DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
-  .addReg(LoReg)
-  .addImm(AMDGPU::sub0)
-  .addReg(HiReg)
-  .addImm(AMDGPU::sub1);
-  }
-
-  // We can't call constrainSelectedInstRegOperands here, because it doesn't
-  // work for target independent opcodes
-  I.eraseFromParent();
-  const TargetRegisterClass *DstRC =
-TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI);
-  if (!DstRC)
-return true;
-  return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
-}
-
 bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr ) const {
   // Only manually handle the f64 SGPR case.
   //
@@ -3521,9 +3429,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr ) {
   case TargetOpcode::G_PTRTOINT:
   case TargetOpcode::G_FREEZE:
 return selectCOPY(I);
-  case TargetOpcode::G_CONSTANT:
-  case TargetOpcode::G_FCONSTANT:
-return 

[llvm-branch-commits] [llvm] AMDGPU: Cleanup immediate selection patterns (PR #100787)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100787

Reorder for consistency, so the same types for v/s are together.

>From 794f20ecd9df0024481842bce8dd9e7d9e3684cb Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 26 Jul 2024 17:08:26 +0400
Subject: [PATCH] AMDGPU: Cleanup immediate selection patterns

Reorder for consistency, so the same types for v/s are together.
---
 llvm/lib/Target/AMDGPU/SIInstructions.td | 79 
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td 
b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d2101654d2acb..bcf778b31d276 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2138,19 +2138,26 @@ def : GCNPat <
 /** Immediate Patterns **/
 /** == **/
 
+// FIXME: Remove VGPRImm. Should be inferrable from register bank.
+
 def : GCNPat <
   (VGPRImm<(i32 imm)>:$imm),
   (V_MOV_B32_e32 imm:$imm)
 >;
 
 def : GCNPat <
-  (VGPRImm<(f32 fpimm)>:$imm),
-  (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
+  (i32 imm:$imm),
+  (S_MOV_B32 imm:$imm)
 >;
 
 def : GCNPat <
-  (i32 imm:$imm),
-  (S_MOV_B32 imm:$imm)
+  (p5 frameindex:$fi),
+  (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi)))
+>;
+
+def : GCNPat <
+  (p5 frameindex:$fi),
+  (S_MOV_B32 (p5 (frameindex_to_targetframeindex $fi)))
 >;
 
 def : GCNPat <
@@ -2168,40 +2175,34 @@ def : GCNPat <
   (V_MOV_B32_e32 imm:$imm)
 >;
 
-// FIXME: Workaround for ordering issue with peephole optimizer where
-// a register class copy interferes with immediate folding.  Should
-// use s_mov_b32, which can be shrunk to s_movk_i32
 def : GCNPat <
-  (VGPRImm<(f16 fpimm)>:$imm),
-  (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
+  (i16 imm:$imm),
+  (S_MOV_B32 imm:$imm)
 >;
 
 def : GCNPat <
-  (VGPRImm<(bf16 fpimm)>:$imm),
-  (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
+  (VGPRImm<(f16 fpimm)>:$imm),
+  (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
 >;
 
-// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
-// immediate and wil be expanded as needed, but we will only use these patterns
-// for values which can be encoded.
 def : GCNPat <
-  (VGPRImm<(i64 imm)>:$imm),
-  (V_MOV_B64_PSEUDO imm:$imm)
+  (f16 fpimm:$imm),
+  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
-  (VGPRImm<(f64 fpimm)>:$imm),
-  (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm)))
+  (VGPRImm<(bf16 fpimm)>:$imm),
+  (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
-  (i64 imm:$imm),
-  (S_MOV_B64_IMM_PSEUDO imm:$imm)
+  (bf16 fpimm:$imm),
+  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
-  (f64 fpimm:$imm),
-  (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm)))
+  (VGPRImm<(f32 fpimm)>:$imm),
+  (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
@@ -2210,31 +2211,38 @@ def : GCNPat <
 >;
 
 def : GCNPat <
-  (f16 fpimm:$imm),
-  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
+  (VGPRImm<(i64 imm)>:$imm),
+  (V_MOV_B64_PSEUDO imm:$imm)
 >;
 
 def : GCNPat <
-  (bf16 fpimm:$imm),
-  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
+  (i64 InlineImm64:$imm),
+  (S_MOV_B64 InlineImm64:$imm)
 >;
 
 def : GCNPat <
-  (p5 frameindex:$fi),
-  (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi)))
+  (i64 imm:$imm),
+  (S_MOV_B64_IMM_PSEUDO imm:$imm)
 >;
 
 def : GCNPat <
-  (p5 frameindex:$fi),
-  (S_MOV_B32 (p5 (frameindex_to_targetframeindex $fi)))
+  (VGPRImm<(f64 fpimm)>:$imm),
+  (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm)))
 >;
 
+// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
+// immediate and wil be expanded as needed, but we will only use these patterns
+// for values which can be encoded.
 def : GCNPat <
-  (i64 InlineImm64:$imm),
-  (S_MOV_B64 InlineImm64:$imm)
+  (f64 InlineImmFP64:$imm),
+  (S_MOV_B64 (i64 (bitcast_fpimm_to_i64 $imm)))
+>;
+
+def : GCNPat <
+  (f64 fpimm:$imm),
+  (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm)))
 >;
 
-// Set to sign-extended 64-bit value (true = -1, false = 0)
 // Set to sign-extended 64-bit value (true = -1, false = 0)
 def : GCNPat <(i1 imm:$imm),
   (S_MOV_B64 imm:$imm)> {
@@ -2246,11 +2254,6 @@ def : GCNPat <(i1 imm:$imm),
   let WaveSizePredicate = isWave32;
 }
 
-def : GCNPat <
-  (f64 InlineImmFP64:$imm),
-  (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineImmFP64:$imm)))
->;
-
 /** == **/
 /** Intrinsic Patterns **/
 /** == **/

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Lower fcNormal is.fpclass to compare with inf (PR #100389)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100389

>From fcfbc51749e1a8289d88eeea504cdf2af94c6cf0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 1 Feb 2023 09:06:59 -0400
Subject: [PATCH] DAG: Lower fcNormal is.fpclass to compare with inf

Looks worse for x86 without the fabs check. Not sure if
this is useful for any targets.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 25 +++
 1 file changed, 25 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 18cd368e24259..dcc65549d7a0e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8673,6 +8673,31 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
 IsOrdered ? OrderedOp : UnorderedOp);
   }
 }
+
+if (FPTestMask == fcNormal) {
+  // TODO: Handle unordered
+  ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
+  ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
+
+  if (isCondCodeLegalOrCustom(IsFiniteOp,
+  OperandVT.getScalarType().getSimpleVT()) &&
+  isCondCodeLegalOrCustom(IsNormalOp,
+  OperandVT.getScalarType().getSimpleVT()) &&
+  isFAbsFree(OperandVT)) {
+// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
+SDValue Inf =
+DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
+SDValue SmallestNormal = DAG.getConstantFP(
+APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
+
+SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
+SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
+SDValue IsNormal =
+DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
+unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
+return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
+  }
+}
   }
 
   // Some checks may be represented as inversion of simpler check, for example

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100380

>From fc46244e25e7dc86354a6fb42316788eab883198 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 1 Feb 2023 09:52:34 -0400
Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp

InstCombine also should have taken care of this, but this
should be helpful when the fcmp based lowering strategy tries
to combine multiple tests.
---
 llvm/lib/CodeGen/CodeGenCommonISel.cpp|   2 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  25 +++-
 llvm/test/CodeGen/AArch64/isinf.ll|  22 ++-
 llvm/test/CodeGen/X86/is_fpclass-fp80.ll  |  52 +++
 llvm/test/CodeGen/X86/is_fpclass.ll   | 137 +-
 5 files changed, 127 insertions(+), 111 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp 
b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index 88c643c568027..942cf442e9098 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest 
Test, bool UseFCmp) {
   case fcSubnormal | fcZero | fcNan:
 return InvertedTest;
   case fcInf | fcNan:
+  case fcPosInf | fcNan:
+  case fcNegInf | fcNan:
 // If we're trying to use fcmp, we can take advantage of the nan check
 // behavior of the compare (but this is more instructions in the integer
 // expansion).
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1e12d7937ba79..18cd368e24259 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8628,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
   return DAG.getSetCC(DL, ResultVT, Op, Op,
   IsInvertedFP ? ISD::SETO : ISD::SETUO);
 
-bool IsOrderedInf = FPTestMask == fcInf;
-if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
-isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
- : UnorderedCmpOpcode,
-OperandVT.getScalarType().getSimpleVT()) &&
-isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+if (OrderedFPTestMask == fcInf &&
+isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
+  : UnorderedCmpOpcode,
+OperandVT.getScalarType().getSimpleVT())) {
   // isinf(x) --> fabs(x) == inf
   SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
   SDValue Inf =
   DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
   return DAG.getSetCC(DL, ResultVT, Abs, Inf,
-  IsOrderedInf ? OrderedCmpOpcode : 
UnorderedCmpOpcode);
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
+}
+
+if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) {
+  // isposinf(x) --> x == inf
+  // isneginf(x) --> x == -inf
+  // isposinf(x) || nan --> x u== inf
+  // isneginf(x) || nan --> x u== -inf
+
+  SDValue Inf = DAG.getConstantFP(
+  APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
+  OperandVT);
+  return DAG.getSetCC(DL, ResultVT, Op, Inf,
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
 }
 
 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll 
b/llvm/test/CodeGen/AArch64/isinf.ll
index 834417b98743a..458bd7eeba16c 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,14 +58,22 @@ define i32 @replace_isinf_call_f64(double %x) {
 define i32 @replace_isinf_call_f128(fp128 %x) {
 ; CHECK-LABEL: replace_isinf_call_f128:
 ; CHECK:   // %bb.0:
-; CHECK-NEXT:str q0, [sp, #-16]!
-; CHECK-NEXT:.cfi_def_cfa_offset 16
-; CHECK-NEXT:ldp x9, x8, [sp], #16
-; CHECK-NEXT:and x8, x8, #0x7fff
-; CHECK-NEXT:eor x8, x8, #0x7fff
-; CHECK-NEXT:orr x8, x9, x8
-; CHECK-NEXT:cmp x8, #0
+; CHECK-NEXT:sub sp, sp, #32
+; CHECK-NEXT:str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 32
+; CHECK-NEXT:.cfi_offset w30, -16
+; CHECK-NEXT:str q0, [sp]
+; CHECK-NEXT:ldrb w8, [sp, #15]
+; CHECK-NEXT:and w8, w8, #0x7f
+; CHECK-NEXT:strb w8, [sp, #15]
+; CHECK-NEXT:adrp x8, .LCPI3_0
+; CHECK-NEXT:ldr q0, [sp]
+; CHECK-NEXT:ldr q1, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT:bl __eqtf2
+; CHECK-NEXT:cmp w0, #0
+; CHECK-NEXT:ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:cset w0, eq
+; CHECK-NEXT:add sp, sp, #32
 ; CHECK-NEXT:ret
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
   %cmpinf = fcmp oeq fp128 %abs, 0xL7FFF
diff --git 

[llvm-branch-commits] [llvm] DAG: Handle lowering unordered compare with inf (PR #100378)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100378
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jul 26, 4:56 AM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100357).


https://github.com/llvm/llvm-project/pull/100357
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jul 26, 4:56 AM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100333).


https://github.com/llvm/llvm-project/pull/100333
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100357

>From 88bb03cf2b3587d08ee5b73fbacb7b6c3bec1b40 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sat, 20 Jul 2024 14:24:23 +0400
Subject: [PATCH 1/3] CodeGen: Remove MachineModuleInfo reference from
 MachineFunction

This avoids another unserializable field. Move the DbgInfoAvailable
field into the AsmPrinter, which is only really a cache/convenience
bit for checking a direct IR module metadata check.
---
 llvm/include/llvm/CodeGen/AsmPrinter.h |  6 ++
 llvm/include/llvm/CodeGen/MachineFunction.h| 18 --
 llvm/include/llvm/CodeGen/MachineModuleInfo.h  |  6 --
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 17 -
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp  |  4 ++--
 .../CodeGen/AsmPrinter/DebugHandlerBase.cpp|  4 ++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp |  2 +-
 llvm/lib/CodeGen/MachineFunction.cpp   | 12 ++--
 llvm/lib/CodeGen/MachineFunctionAnalysis.cpp   |  2 +-
 llvm/lib/CodeGen/MachineModuleInfo.cpp |  5 +
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp  |  4 +---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp  | 12 ++--
 llvm/tools/llvm-reduce/ReducerWorkItem.cpp |  2 +-
 .../CodeGen/AArch64SelectionDAGTest.cpp|  4 ++--
 llvm/unittests/CodeGen/InstrRefLDVTest.cpp |  2 +-
 llvm/unittests/CodeGen/MFCommon.inc|  3 ++-
 .../SelectionDAGAddressAnalysisTest.cpp|  2 +-
 .../CodeGen/SelectionDAGPatternMatchTest.cpp   |  2 +-
 .../AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp   |  3 ++-
 llvm/unittests/Target/AMDGPU/PALMetadata.cpp   |  2 +-
 .../Target/RISCV/RISCVInstrInfoTest.cpp|  2 +-
 21 files changed, 54 insertions(+), 60 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h 
b/llvm/include/llvm/CodeGen/AsmPrinter.h
index f57be39076a783..36d1b479738704 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -225,6 +225,9 @@ class AsmPrinter : public MachineFunctionPass {
   /// split stack prologue.
   bool HasNoSplitStack = false;
 
+  /// True if debugging information is available in this module.
+  bool DbgInfoAvailable = false;
+
 protected:
   explicit AsmPrinter(TargetMachine , std::unique_ptr Streamer);
 
@@ -430,6 +433,9 @@ class AsmPrinter : public MachineFunctionPass {
   /// Get the CFISection type for the module.
   CFISection getModuleCFISectionType() const { return ModuleCFISection; }
 
+  /// Returns true if valid debug info is present.
+  bool hasDebugInfo() const { return DbgInfoAvailable; }
+
   bool needsSEHMoves();
 
   /// Since emitting CFI unwind information is entangled with supporting the
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h 
b/llvm/include/llvm/CodeGen/MachineFunction.h
index 6e7292abeddbbd..142570b9ce551e 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -260,7 +260,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
   const LLVMTargetMachine 
   const TargetSubtargetInfo *STI;
   MCContext 
-  MachineModuleInfo 
 
   // RegInfo - Information about each register in use in the function.
   MachineRegisterInfo *RegInfo;
@@ -395,15 +394,15 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
 
   /// \}
 
-  /// Clear all the members of this MachineFunction, but the ones used
-  /// to initialize again the MachineFunction.
-  /// More specifically, this deallocates all the dynamically allocated
-  /// objects and get rid of all the XXXInfo data structure, but keep
-  /// unchanged the references to Fn, Target, MMI, and FunctionNumber.
+  /// Clear all the members of this MachineFunction, but the ones used to
+  /// initialize again the MachineFunction.  More specifically, this 
deallocates
+  /// all the dynamically allocated objects and get rid of all the XXXInfo data
+  /// structure, but keep unchanged the references to Fn, Target, and
+  /// FunctionNumber.
   void clear();
   /// Allocate and initialize the different members.
   /// In particular, the XXXInfo data structure.
-  /// \pre Fn, Target, MMI, and FunctionNumber are properly set.
+  /// \pre Fn, Target, and FunctionNumber are properly set.
   void init();
 
 public:
@@ -632,8 +631,8 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
   const static unsigned int DebugOperandMemNumber;
 
   MachineFunction(Function , const LLVMTargetMachine ,
-  const TargetSubtargetInfo , unsigned FunctionNum,
-  MachineModuleInfo );
+  const TargetSubtargetInfo , MCContext ,
+  unsigned FunctionNum);
   MachineFunction(const MachineFunction &) = delete;
   MachineFunction =(const MachineFunction &) = delete;
   ~MachineFunction();
@@ -665,7 +664,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
 
   GISelChangeObserver *getObserver() const { return Observer; }
 
-  MachineModuleInfo () const { 

[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100333

>From 9f6b09e1041ed88c95a7c51ac441769f4f82cfd6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 13:11:04 +0400
Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks

I assume getSubprogram will do the correct thing in hasDebugInfo,
and this is redundant with the debug_compile_units distance check.
This is in preparation for removing the field.
---
 llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp   | 7 ---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 6c70c47de8822..ed99eb3c459e5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
  Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
-static bool hasDebugInfo(const MachineModuleInfo *MMI,
- const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo())
-return false;
+static bool hasDebugInfo(const MachineFunction *MF) {
   auto *SP = MF->getFunction().getSubprogram();
   if (!SP)
 return false;
@@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
 void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
   PrevInstBB = nullptr;
 
-  if (!Asm || !hasDebugInfo(MMI, MF)) {
+  if (!Asm || !hasDebugInfo(MF)) {
 skippedNonDebugFunction();
 return;
   }
@@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() {
 }
 
 void DebugHandlerBase::endFunction(const MachineFunction *MF) {
-  if (Asm && hasDebugInfo(MMI, MF))
+  if (Asm && hasDebugInfo(MF))
 endFunctionImpl(MF);
   DbgValues.clear();
   DbgLabels.clear();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5f1f315c5ab24..fbce7e92b7781 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1148,14 +1148,15 @@ 
sortGlobalExprs(SmallVectorImpl ) {
 void DwarfDebug::beginModule(Module *M) {
   DebugHandlerBase::beginModule(M);
 
-  if (!Asm || !MMI->hasDebugInfo())
+  if (!Asm)
 return;
 
   unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
+  if (NumDebugCUs == 0)
+return;
+
   assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
-  assert(MMI->hasDebugInfo() &&
- "DebugInfoAvailabilty unexpectedly not initialized");
   SingleCU = NumDebugCUs == 1;
   DenseMap>
   GVMap;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp 
b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0b654abd2814c..b4eba07afe7c5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() {
   // Emit initial .loc debug directive for correct relocation symbol data.
   if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
 assert(SP->getUnit());
-if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
+if (!SP->getUnit()->isDebugDirectivesOnly())
   emitInitialRawDwarfLocDirective(*MF);
   }
 }
@@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream ,
 if (HasFullDebugInfo)
   break;
   }
-  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
+  if (HasFullDebugInfo)
 O << ", debug";
 
   O << "\n";

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen: Move current call site out of MachineModuleInfo (PR #100369)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jul 26, 3:21 AM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100369).


https://github.com/llvm/llvm-project/pull/100369
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100357
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100333
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen: Move current call site out of MachineModuleInfo (PR #100369)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100369

>From 3069e94a57f37b11c466b5cd1b71fde4f538a861 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 17:00:00 +0400
Subject: [PATCH 1/2] CodeGen: Move current call site out of MachineModuleInfo

I do not know understand what this is for, but it's only used in
SelectionDAGBuilder, so move it to FunctionLoweringInfo like other
function scope DAG builder state. The intrinsics are not documented
in the LangRef or Intrinsics.td.

This removes the last piece of codegen state from MachineModuleInfo.
---
 .../llvm/CodeGen/FunctionLoweringInfo.h   | 17 +
 llvm/include/llvm/CodeGen/MachineModuleInfo.h | 24 ---
 llvm/lib/CodeGen/MachineModuleInfo.cpp|  2 --
 .../SelectionDAG/SelectionDAGBuilder.cpp  | 10 
 4 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h 
b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 45a47d7333e35..fa75d883e451c 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -183,11 +183,28 @@ class FunctionLoweringInfo {
   std::vector > PHINodesToUpdate;
   unsigned OrigNumPHINodesToUpdate;
 
+  /// \name Exception Handling
+  /// \{
+
   /// If the current MBB is a landing pad, the exception pointer and exception
   /// selector registers are copied into these virtual registers by
   /// SelectionDAGISel::PrepareEHLandingPad().
   unsigned ExceptionPointerVirtReg, ExceptionSelectorVirtReg;
 
+  /// The current call site index being processed, if any. 0 if none.
+  unsigned CurCallSite = 0;
+  // TODO: Ideally, what we'd like is to have a switch that allows emitting
+  // synchronous (precise at call-sites only) CFA into .eh_frame. However,
+  // even under this switch, we'd like .debug_frame to be precise when using
+  // -g. At this moment, there's no way to specify that some CFI directives
+  // go into .eh_frame only, while others go into .debug_frame only.
+
+  /// Set the call site currently being processed.
+  void setCurrentCallSite(unsigned Site) { CurCallSite = Site; }
+
+  /// Get the call site currently being processed, if any. Return zero if none.
+  unsigned getCurrentCallSite() { return CurCallSite; }
+
   /// Collection of dbg.declare instructions handled after argument
   /// lowering and before ISel proper.
   SmallPtrSet PreprocessedDbgDeclares;
diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h 
b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index dfa0e993ec06a..f054c56bb641c 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -99,20 +99,6 @@ class MachineModuleInfo {
   /// want.
   MachineModuleInfoImpl *ObjFileMMI;
 
-  /// \name Exception Handling
-  /// \{
-
-  /// The current call site index being processed, if any. 0 if none.
-  unsigned CurCallSite = 0;
-
-  /// \}
-
-  // TODO: Ideally, what we'd like is to have a switch that allows emitting
-  // synchronous (precise at call-sites only) CFA into .eh_frame. However,
-  // even under this switch, we'd like .debug_frame to be precise when using
-  // -g. At this moment, there's no way to specify that some CFI directives
-  // go into .eh_frame only, while others go into .debug_frame only.
-
   /// True if debugging information is available in this module.
   bool DbgInfoAvailable = false;
 
@@ -185,16 +171,6 @@ class MachineModuleInfo {
   /// Returns true if valid debug info is present.
   bool hasDebugInfo() const { return DbgInfoAvailable; }
 
-  /// \name Exception Handling
-  /// \{
-
-  /// Set the call site currently being processed.
-  void setCurrentCallSite(unsigned Site) { CurCallSite = Site; }
-
-  /// Get the call site currently being processed, if any.  return zero if
-  /// none.
-  unsigned getCurrentCallSite() { return CurCallSite; }
-
   /// \}
 }; // End class MachineModuleInfo
 
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp 
b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 150ab363c8fcd..f382df1d2a6e0 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -26,7 +26,6 @@ MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
 
 void MachineModuleInfo::initialize() {
   ObjFileMMI = nullptr;
-  CurCallSite = 0;
   NextFnNum = 0;
   DbgInfoAvailable = false;
 }
@@ -46,7 +45,6 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &)
   MachineFunctions(std::move(MMI.MachineFunctions)) {
   Context.setObjectFileInfo(TM.getObjFileLowering());
   ObjFileMMI = MMI.ObjFileMMI;
-  CurCallSite = MMI.CurCallSite;
   ExternalContext = MMI.ExternalContext;
   TheModule = MMI.TheModule;
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1791f1b503379..c554c0f5b6fd7 100644
--- 

[llvm-branch-commits] [llvm] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo (PR #100368)

2024-07-26 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100368

>From db429d7de96c0b5c80b015adc73a13025f93d4ad Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 17 Apr 2022 10:28:14 -0400
Subject: [PATCH 1/2] CodeGen: Remove UsesMSVCFloatingPoint from
 MachineModuleInfo

This is only used by x86 and only used in the AsmPrinter module pass. I
think implementing this by looking at the underlying IR types instead
of the selected instructions is a pretty horrifying implementation,
but it's still available in the AsmPrinter.

This is https://reviews.llvm.org/D123933 resurrected.

I still don't know what the point of emitting _fltused is, but this
approach of looking at the IR types probably isn't the right way to
do this in the first place. If the intent is report any FP instructions,
this will miss any implicitly introduced ones during codegen. Also don't
know why just unconditionally emitting it isn't an option.

The last review mentioned the ARMs might want to emit this, but I'm
not going to go fix that. If someone wants to emit this on ARM, they
can move this to a common helper or analysis somewhere.
---
 llvm/include/llvm/CodeGen/MachineModuleInfo.h |  8 --
 llvm/lib/CodeGen/MachineModuleInfo.cpp|  1 -
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 27 ---
 llvm/lib/Target/X86/X86AsmPrinter.cpp | 25 -
 4 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h 
b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index 97b439c726b0a..dfa0e993ec06a 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -116,10 +116,6 @@ class MachineModuleInfo {
   /// True if debugging information is available in this module.
   bool DbgInfoAvailable = false;
 
-  /// True if this module is being built for windows/msvc, and uses floating
-  /// point.  This is used to emit an undefined reference to _fltused.
-  bool UsesMSVCFloatingPoint = false;
-
   /// Maps IR Functions to their corresponding MachineFunctions.
   DenseMap> MachineFunctions;
   /// Next unique number available for a MachineFunction.
@@ -189,10 +185,6 @@ class MachineModuleInfo {
   /// Returns true if valid debug info is present.
   bool hasDebugInfo() const { return DbgInfoAvailable; }
 
-  bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; }
-
-  void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; }
-
   /// \name Exception Handling
   /// \{
 
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp 
b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 088e76029f1a3..150ab363c8fcd 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -28,7 +28,6 @@ void MachineModuleInfo::initialize() {
   ObjFileMMI = nullptr;
   CurCallSite = 0;
   NextFnNum = 0;
-  UsesMSVCFloatingPoint = false;
   DbgInfoAvailable = false;
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 401d23b22adcd..84331d257a3d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -417,30 +417,6 @@ void 
SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage ) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-static void computeUsesMSVCFloatingPoint(const Triple , const Function ,
- MachineModuleInfo ) {
-  // Only needed for MSVC
-  if (!TT.isWindowsMSVCEnvironment())
-return;
-
-  // If it's already set, nothing to do.
-  if (MMI.usesMSVCFloatingPoint())
-return;
-
-  for (const Instruction  : instructions(F)) {
-if (I.getType()->isFPOrFPVectorTy()) {
-  MMI.setUsesMSVCFloatingPoint(true);
-  return;
-}
-for (const auto  : I.operands()) {
-  if (Op->getType()->isFPOrFPVectorTy()) {
-MMI.setUsesMSVCFloatingPoint(true);
-return;
-  }
-}
-  }
-}
-
 PreservedAnalyses
 SelectionDAGISelPass::run(MachineFunction ,
   MachineFunctionAnalysisManager ) {
@@ -802,9 +778,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction 
) {
 }
   }
 
-  // Determine if floating point is used for msvc
-  computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, *CurDAG->getMMI());
-
   // Release function-specific state. SDB and CurDAG are already cleared
   // at this point.
   FuncInfo->clear();
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp 
b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 0c2c6bf7f8b70..9d86a9c9d1609 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGenTypes/MachineValueType.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
@@ -975,6 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From 49db2b2b9855d18df6449b6dedf7e50ccc1d6265 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

Also adjust the AMDGPU cost.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  32 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   9 +-
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +-
 3 files changed, 210 insertions(+), 199 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..8ae236850b982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+  case Intrinsic::abs:
 return true;
   default:
 return false;
@@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && SLT == MVT::f16) ||
+  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
@@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+// TODO: Full rate for i32/i16
 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
 if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; }))
   NElts = 1;
 break;
+  case Intrinsic::abs:
+// Expansion takes 2 instructions for VALU
+if (SLT == MVT::i16 || SLT == MVT::i32)
+  InstRate = 2 * getFullRateInstrCost();
+break;
   }
 
   return LT.first * NElts * InstRate;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..b86e99558377b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -14,116 +14,116 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100520

>From 1d17da3e7cd5253d0c7a9bb8acc5989d1e5ba615 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100519

>From 3d683da35b98db6dd0b5a94692b735765a6f776f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  67 +-
 .../Analysis/CostModel/X86/arith-overflow.ll  | 120 +-
 2 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index c5da46af04367..28d53042d4c21 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x 
i16> undef, <16 x i16> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 148 

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jul 25, 4:25 PM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100522).


https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From 949edfeeecddb315bf95dd82be99c57a4711c30a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

Also adjust the AMDGPU cost.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  32 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   9 +-
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +-
 3 files changed, 210 insertions(+), 199 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..8ae236850b982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+  case Intrinsic::abs:
 return true;
   default:
 return false;
@@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && SLT == MVT::f16) ||
+  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
@@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes ,
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+// TODO: Full rate for i32/i16
 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
 if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; }))
   NElts = 1;
 break;
+  case Intrinsic::abs:
+// Expansion takes 2 instructions for VALU
+if (SLT == MVT::i16 || SLT == MVT::i32)
+  InstRate = 2 * getFullRateInstrCost();
+break;
   }
 
   return LT.first * NElts * InstRate;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..b86e99558377b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -14,116 +14,116 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From 85c14e04d3e27c8609fac2890eb475963d7f008b Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h   | 32 +
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++---
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..e290f0631ff16 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -24,11 +24,11 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100520

>From 39ca2c43676bf82f97f8cce2e09091e7d849dfab Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100519

>From 5a2e8acf2b7e4aafae237a035f81557d97948a29 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  67 +-
 .../Analysis/CostModel/X86/arith-overflow.ll  | 120 +-
 2 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index c5da46af04367..28d53042d4c21 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x 
i16> undef, <16 x i16> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 148 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100514
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Fix special casing vectorization costs of saturating add/sub (PR #97463)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/97463
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100522

>From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:47:03 +0400
Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics

---
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 +
 1 file changed, 310 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0..f65615b07abc0
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,310 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SLOW-SIZE %s
+; END.
+
+define void @abs_nonpoison() {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 
= call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: 
%V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = 
call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 
= call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: 
%V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: 
%V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: 
%V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = 
call i8 @llvm.abs.i8(i8 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I8 
= call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I8 
= call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100522

>From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:47:03 +0400
Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics

---
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 +
 1 file changed, 310 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0..f65615b07abc0
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,310 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SLOW-SIZE %s
+; END.
+
+define void @abs_nonpoison() {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 
= call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: 
%V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = 
call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 
= call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: 
%V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: 
%V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: 
%V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = 
call i8 @llvm.abs.i8(i8 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I8 
= call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I8 
= call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 

[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/96760
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100521
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100520
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100519
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100518
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100523** https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100523

None

>From ca78bfb62816c21172101c1f00dcead3efc472dc Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h   | 32 +
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++---
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index 133b95609bc15..623e02eb8239d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for 

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100521
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100520
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100522

None

>From 330c0e2bf40cf96b1c7778636fa739cb0c1a1f11 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:47:03 +0400
Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics

---
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 341 +
 1 file changed, 341 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0..133b95609bc15
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SLOW-SIZE %s
+; END.
+
+declare i64@llvm.abs.i64(i64, i1 immarg)
+declare <2 x i64>  @llvm.abs.v2i64(<2 x i64>, i1 immarg)
+declare <4 x i64>  @llvm.abs.v4i64(<4 x i64>, i1 immarg)
+declare <5 x i64>  @llvm.abs.v5i64(<5 x i64>, i1 immarg)
+declare <8 x i64>  @llvm.abs.v8i64(<8 x i64>, i1 immarg)
+
+declare i32@llvm.abs.i32(i32, i1 immarg)
+declare <2 x i32>  @llvm.abs.v2i32(<2 x i32>, i1 immarg)
+declare <4 x i32>  @llvm.abs.v4i32(<4 x i32>, i1 immarg)
+declare <8 x i32>  @llvm.abs.v8i32(<8 x i32>, i1 immarg)
+declare <9 x i32>  @llvm.abs.v9i32(<9 x i32>, i1 immarg)
+declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg)
+
+declare i16@llvm.abs.i16(i16, i1 immarg)
+declare <2 x i16>  @llvm.abs.v2i16(<2 x i16>, i1 immarg)
+declare <4 x i16>  @llvm.abs.v4i16(<4 x i16>, i1 immarg)
+declare <8 x i16>  @llvm.abs.v8i16(<8 x i16>, i1 immarg)
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg)
+declare <17 x i16> @llvm.abs.v17i16(<17 x i16>, i1 immarg)
+declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1 immarg)
+
+declare i8 @llvm.abs.i8(i8, i1 immarg)
+declare <2 x i8>   @llvm.abs.v2i8(<2 x i8>, i1 immarg)
+declare <4 x i8>   @llvm.abs.v4i8(<4 x i8>, i1 immarg)
+declare <8 x i8>   @llvm.abs.v8i8(<8 x i8>, i1 immarg)
+declare <16 x i8>  @llvm.abs.v16i8(<16 x i8>, i1 immarg)
+declare <32 x i8>  @llvm.abs.v32i8(<32 x i8>, i1 immarg)
+declare <33 x i8>  @llvm.abs.v33i8(<33 x i8>, i1 immarg)
+declare <64 x i8>  @llvm.abs.v64i8(<64 x i8>, i1 immarg)
+
+define i32 @abs_nonpoison(i32 %arg) {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 
= call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: 
%V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = 
call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 
= call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: 
%V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: 
%V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: 
%V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100518
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100519
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100520

None

>From 689ea8720d60ae6fc1226b929f5333adae1ce77c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100519

None

>From c98dcbf907a6b5d085b89f06d49ee8a3bc3e9dd2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  67 +-
 .../Analysis/CostModel/X86/arith-overflow.ll  | 120 +-
 .../CostModel/X86/intrinsic-cost-kinds.ll |   6 +-
 3 files changed, 99 insertions(+), 94 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index ba745262d1890..2d907d87b057c 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x 
i16> undef, <16 x i16> 

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100513

>From 80b236530103a66b8939aeb26f1d5c2be9043b5c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 2 Jul 2024 21:28:30 +0200
Subject: [PATCH] AMDGPU: Add baseline test for vectorize of integer min/max

---
 .../SLPVectorizer/AMDGPU/min_max.ll   | 366 ++
 1 file changed, 366 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll 
b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
new file mode 100644
index 0..47b0dbd6b2cff
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @uadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @uadd_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @uadd_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @usub_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @usub_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @usub_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @sadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100514
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100514
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100513
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100514
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100513
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100513

None

>From 7a8f09d99fa0a90fc7fe442d87103e66ea2ff806 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 2 Jul 2024 21:28:30 +0200
Subject: [PATCH] AMDGPU: Add baseline test for vectorize of integer min/max

---
 .../SLPVectorizer/AMDGPU/min_max.ll   | 366 ++
 1 file changed, 366 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll 
b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
new file mode 100644
index 0..47b0dbd6b2cff
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @uadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @uadd_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @uadd_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @usub_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @usub_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @usub_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @sadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = 

[llvm-branch-commits] [llvm] CodeGen: Move current call site out of MachineModuleInfo (PR #100369)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100369

>From 4f1d8d439c2c0ff5742a98f8fe42d8212d91f556 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 17:00:00 +0400
Subject: [PATCH 1/2] CodeGen: Move current call site out of MachineModuleInfo

I do not know understand what this is for, but it's only used in
SelectionDAGBuilder, so move it to FunctionLoweringInfo like other
function scope DAG builder state. The intrinsics are not documented
in the LangRef or Intrinsics.td.

This removes the last piece of codegen state from MachineModuleInfo.
---
 .../llvm/CodeGen/FunctionLoweringInfo.h   | 17 +
 llvm/include/llvm/CodeGen/MachineModuleInfo.h | 24 ---
 llvm/lib/CodeGen/MachineModuleInfo.cpp|  2 --
 .../SelectionDAG/SelectionDAGBuilder.cpp  | 10 
 4 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h 
b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 45a47d7333e35..fa75d883e451c 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -183,11 +183,28 @@ class FunctionLoweringInfo {
   std::vector > PHINodesToUpdate;
   unsigned OrigNumPHINodesToUpdate;
 
+  /// \name Exception Handling
+  /// \{
+
   /// If the current MBB is a landing pad, the exception pointer and exception
   /// selector registers are copied into these virtual registers by
   /// SelectionDAGISel::PrepareEHLandingPad().
   unsigned ExceptionPointerVirtReg, ExceptionSelectorVirtReg;
 
+  /// The current call site index being processed, if any. 0 if none.
+  unsigned CurCallSite = 0;
+  // TODO: Ideally, what we'd like is to have a switch that allows emitting
+  // synchronous (precise at call-sites only) CFA into .eh_frame. However,
+  // even under this switch, we'd like .debug_frame to be precise when using
+  // -g. At this moment, there's no way to specify that some CFI directives
+  // go into .eh_frame only, while others go into .debug_frame only.
+
+  /// Set the call site currently being processed.
+  void setCurrentCallSite(unsigned Site) { CurCallSite = Site; }
+
+  /// Get the call site currently being processed, if any. Return zero if none.
+  unsigned getCurrentCallSite() { return CurCallSite; }
+
   /// Collection of dbg.declare instructions handled after argument
   /// lowering and before ISel proper.
   SmallPtrSet PreprocessedDbgDeclares;
diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h 
b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index f69be67ee9f17..310cc4b2abb77 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -99,20 +99,6 @@ class MachineModuleInfo {
   /// want.
   MachineModuleInfoImpl *ObjFileMMI;
 
-  /// \name Exception Handling
-  /// \{
-
-  /// The current call site index being processed, if any. 0 if none.
-  unsigned CurCallSite = 0;
-
-  /// \}
-
-  // TODO: Ideally, what we'd like is to have a switch that allows emitting
-  // synchronous (precise at call-sites only) CFA into .eh_frame. However,
-  // even under this switch, we'd like .debug_frame to be precise when using
-  // -g. At this moment, there's no way to specify that some CFI directives
-  // go into .eh_frame only, while others go into .debug_frame only.
-
   /// Maps IR Functions to their corresponding MachineFunctions.
   DenseMap> MachineFunctions;
   /// Next unique number available for a MachineFunction.
@@ -179,16 +165,6 @@ class MachineModuleInfo {
 return const_cast(this)->getObjFileInfo();
   }
 
-  /// \name Exception Handling
-  /// \{
-
-  /// Set the call site currently being processed.
-  void setCurrentCallSite(unsigned Site) { CurCallSite = Site; }
-
-  /// Get the call site currently being processed, if any.  return zero if
-  /// none.
-  unsigned getCurrentCallSite() { return CurCallSite; }
-
   /// \}
 }; // End class MachineModuleInfo
 
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp 
b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 23de726a2ab97..26b38ceec393c 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -36,7 +36,6 @@ MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
 
 void MachineModuleInfo::initialize() {
   ObjFileMMI = nullptr;
-  CurCallSite = 0;
   NextFnNum = 0;
 }
 
@@ -55,7 +54,6 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &)
   MachineFunctions(std::move(MMI.MachineFunctions)) {
   Context.setObjectFileInfo(TM.getObjFileLowering());
   ObjFileMMI = MMI.ObjFileMMI;
-  CurCallSite = MMI.CurCallSite;
   ExternalContext = MMI.ExternalContext;
   TheModule = MMI.TheModule;
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 57a483a5a57ce..c554c0f5b6fd7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ 

[llvm-branch-commits] [llvm] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo (PR #100368)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100368

>From 8991fa261a7705f99ac5729b6bbb1cfeb53e1263 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 17 Apr 2022 10:28:14 -0400
Subject: [PATCH 1/2] CodeGen: Remove UsesMSVCFloatingPoint from
 MachineModuleInfo

This is only used by x86 and only used in the AsmPrinter module pass. I
think implementing this by looking at the underlying IR types instead
of the selected instructions is a pretty horrifying implementation,
but it's still available in the AsmPrinter.

This is https://reviews.llvm.org/D123933 resurrected.

I still don't know what the point of emitting _fltused is, but this
approach of looking at the IR types probably isn't the right way to
do this in the first place. If the intent is report any FP instructions,
this will miss any implicitly introduced ones during codegen. Also don't
know why just unconditionally emitting it isn't an option.

The last review mentioned the ARMs might want to emit this, but I'm
not going to go fix that. If someone wants to emit this on ARM, they
can move this to a common helper or analysis somewhere.
---
 llvm/include/llvm/CodeGen/MachineModuleInfo.h |  8 --
 llvm/lib/CodeGen/MachineModuleInfo.cpp|  1 -
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 27 ---
 llvm/lib/Target/X86/X86AsmPrinter.cpp | 25 -
 4 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h 
b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index b39db93b021b5..f69be67ee9f17 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -113,10 +113,6 @@ class MachineModuleInfo {
   // -g. At this moment, there's no way to specify that some CFI directives
   // go into .eh_frame only, while others go into .debug_frame only.
 
-  /// True if this module is being built for windows/msvc, and uses floating
-  /// point.  This is used to emit an undefined reference to _fltused.
-  bool UsesMSVCFloatingPoint = false;
-
   /// Maps IR Functions to their corresponding MachineFunctions.
   DenseMap> MachineFunctions;
   /// Next unique number available for a MachineFunction.
@@ -183,10 +179,6 @@ class MachineModuleInfo {
 return const_cast(this)->getObjFileInfo();
   }
 
-  bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; }
-
-  void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; }
-
   /// \name Exception Handling
   /// \{
 
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp 
b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 12dec288b3ce2..23de726a2ab97 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -38,7 +38,6 @@ void MachineModuleInfo::initialize() {
   ObjFileMMI = nullptr;
   CurCallSite = 0;
   NextFnNum = 0;
-  UsesMSVCFloatingPoint = false;
 }
 
 void MachineModuleInfo::finalize() {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 401d23b22adcd..84331d257a3d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -417,30 +417,6 @@ void 
SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage ) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-static void computeUsesMSVCFloatingPoint(const Triple , const Function ,
- MachineModuleInfo ) {
-  // Only needed for MSVC
-  if (!TT.isWindowsMSVCEnvironment())
-return;
-
-  // If it's already set, nothing to do.
-  if (MMI.usesMSVCFloatingPoint())
-return;
-
-  for (const Instruction  : instructions(F)) {
-if (I.getType()->isFPOrFPVectorTy()) {
-  MMI.setUsesMSVCFloatingPoint(true);
-  return;
-}
-for (const auto  : I.operands()) {
-  if (Op->getType()->isFPOrFPVectorTy()) {
-MMI.setUsesMSVCFloatingPoint(true);
-return;
-  }
-}
-  }
-}
-
 PreservedAnalyses
 SelectionDAGISelPass::run(MachineFunction ,
   MachineFunctionAnalysisManager ) {
@@ -802,9 +778,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction 
) {
 }
   }
 
-  // Determine if floating point is used for msvc
-  computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, *CurDAG->getMMI());
-
   // Release function-specific state. SDB and CurDAG are already cleared
   // at this point.
   FuncInfo->clear();
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp 
b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 0c2c6bf7f8b70..9d86a9c9d1609 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGenTypes/MachineValueType.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
@@ -975,6 

[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100357

>From 8aff4bdfa99b782379a5383af548c4250605ed63 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sat, 20 Jul 2024 14:24:23 +0400
Subject: [PATCH 1/3] CodeGen: Remove MachineModuleInfo reference from
 MachineFunction

This avoids another unserializable field. Move the DbgInfoAvailable
field into the AsmPrinter, which is only really a cache/convenience
bit for checking a direct IR module metadata check.
---
 llvm/include/llvm/CodeGen/AsmPrinter.h |  6 ++
 llvm/include/llvm/CodeGen/MachineFunction.h| 18 --
 llvm/include/llvm/CodeGen/MachineModuleInfo.h  |  6 --
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 17 -
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp  |  4 ++--
 .../CodeGen/AsmPrinter/DebugHandlerBase.cpp|  4 ++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp |  2 +-
 llvm/lib/CodeGen/MachineFunction.cpp   | 12 ++--
 llvm/lib/CodeGen/MachineFunctionAnalysis.cpp   |  2 +-
 llvm/lib/CodeGen/MachineModuleInfo.cpp |  5 +
 .../SelectionDAG/SelectionDAGBuilder.cpp   |  6 +++---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp  |  4 +---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp  | 12 ++--
 llvm/tools/llvm-reduce/ReducerWorkItem.cpp |  2 +-
 .../CodeGen/AArch64SelectionDAGTest.cpp|  4 ++--
 llvm/unittests/CodeGen/InstrRefLDVTest.cpp |  2 +-
 llvm/unittests/CodeGen/MFCommon.inc|  3 ++-
 .../SelectionDAGAddressAnalysisTest.cpp|  2 +-
 .../CodeGen/SelectionDAGPatternMatchTest.cpp   |  2 +-
 .../AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp   |  3 ++-
 llvm/unittests/Target/AMDGPU/PALMetadata.cpp   |  2 +-
 .../Target/RISCV/RISCVInstrInfoTest.cpp|  2 +-
 22 files changed, 57 insertions(+), 63 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h 
b/llvm/include/llvm/CodeGen/AsmPrinter.h
index f57be39076a78..36d1b47973870 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -225,6 +225,9 @@ class AsmPrinter : public MachineFunctionPass {
   /// split stack prologue.
   bool HasNoSplitStack = false;
 
+  /// True if debugging information is available in this module.
+  bool DbgInfoAvailable = false;
+
 protected:
   explicit AsmPrinter(TargetMachine , std::unique_ptr Streamer);
 
@@ -430,6 +433,9 @@ class AsmPrinter : public MachineFunctionPass {
   /// Get the CFISection type for the module.
   CFISection getModuleCFISectionType() const { return ModuleCFISection; }
 
+  /// Returns true if valid debug info is present.
+  bool hasDebugInfo() const { return DbgInfoAvailable; }
+
   bool needsSEHMoves();
 
   /// Since emitting CFI unwind information is entangled with supporting the
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h 
b/llvm/include/llvm/CodeGen/MachineFunction.h
index 6e7292abeddbb..142570b9ce551 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -260,7 +260,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
   const LLVMTargetMachine 
   const TargetSubtargetInfo *STI;
   MCContext 
-  MachineModuleInfo 
 
   // RegInfo - Information about each register in use in the function.
   MachineRegisterInfo *RegInfo;
@@ -395,15 +394,15 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
 
   /// \}
 
-  /// Clear all the members of this MachineFunction, but the ones used
-  /// to initialize again the MachineFunction.
-  /// More specifically, this deallocates all the dynamically allocated
-  /// objects and get rid of all the XXXInfo data structure, but keep
-  /// unchanged the references to Fn, Target, MMI, and FunctionNumber.
+  /// Clear all the members of this MachineFunction, but the ones used to
+  /// initialize again the MachineFunction.  More specifically, this 
deallocates
+  /// all the dynamically allocated objects and get rid of all the XXXInfo data
+  /// structure, but keep unchanged the references to Fn, Target, and
+  /// FunctionNumber.
   void clear();
   /// Allocate and initialize the different members.
   /// In particular, the XXXInfo data structure.
-  /// \pre Fn, Target, MMI, and FunctionNumber are properly set.
+  /// \pre Fn, Target, and FunctionNumber are properly set.
   void init();
 
 public:
@@ -632,8 +631,8 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
   const static unsigned int DebugOperandMemNumber;
 
   MachineFunction(Function , const LLVMTargetMachine ,
-  const TargetSubtargetInfo , unsigned FunctionNum,
-  MachineModuleInfo );
+  const TargetSubtargetInfo , MCContext ,
+  unsigned FunctionNum);
   MachineFunction(const MachineFunction &) = delete;
   MachineFunction =(const MachineFunction &) = delete;
   ~MachineFunction();
@@ -665,7 +664,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
 
   GISelChangeObserver *getObserver() const { 

[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100353

>From 708970d494353c8a2e5dcf66fb4fc0554132d518 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 15:53:01 +0400
Subject: [PATCH] FastISel: Do not check for module debug info

I don't see the point of this check and SelectionDAG
does not perform it. In the normal usecase, if there's
no debug info the debug intrinsics would not be there
in the first place.
---
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 16 
 1 file changed, 16 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ef9f783355190..e255bbaa92b16 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1203,11 +1203,6 @@ void FastISel::handleDbgInfo(const Instruction *II) {
 
 if (DbgLabelRecord *DLR = dyn_cast()) {
   assert(DLR->getLabel() && "Missing label");
-  if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
-LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n");
-continue;
-  }
-
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(),
   TII.get(TargetOpcode::DBG_LABEL))
   .addMetadata(DLR->getLabel());
@@ -1402,12 +1397,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst 
*II) {
   case Intrinsic::dbg_declare: {
 const DbgDeclareInst *DI = cast(II);
 assert(DI->getVariable() && "Missing variable");
-if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
-  LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
-<< " (!hasDebugInfo)\n");
-  return true;
-}
-
 if (FuncInfo.PreprocessedDbgDeclares.contains(DI))
   return true;
 
@@ -1446,11 +1435,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst 
*II) {
   case Intrinsic::dbg_label: {
 const DbgLabelInst *DI = cast(II);
 assert(DI->getLabel() && "Missing label");
-if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
-  LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
-  return true;
-}
-
 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
 TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
 return true;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100333

>From 0d0fe0051af3a7a4c11195042d85c8a5caae80b5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 13:11:04 +0400
Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks

I assume getSubprogram will do the correct thing in hasDebugInfo,
and this is redundant with the debug_compile_units distance check.
This is in preparation for removing the field.
---
 llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp   | 7 ---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 6c70c47de8822..ed99eb3c459e5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
  Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
-static bool hasDebugInfo(const MachineModuleInfo *MMI,
- const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo())
-return false;
+static bool hasDebugInfo(const MachineFunction *MF) {
   auto *SP = MF->getFunction().getSubprogram();
   if (!SP)
 return false;
@@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
 void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
   PrevInstBB = nullptr;
 
-  if (!Asm || !hasDebugInfo(MMI, MF)) {
+  if (!Asm || !hasDebugInfo(MF)) {
 skippedNonDebugFunction();
 return;
   }
@@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() {
 }
 
 void DebugHandlerBase::endFunction(const MachineFunction *MF) {
-  if (Asm && hasDebugInfo(MMI, MF))
+  if (Asm && hasDebugInfo(MF))
 endFunctionImpl(MF);
   DbgValues.clear();
   DbgLabels.clear();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5f1f315c5ab24..fbce7e92b7781 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1148,14 +1148,15 @@ 
sortGlobalExprs(SmallVectorImpl ) {
 void DwarfDebug::beginModule(Module *M) {
   DebugHandlerBase::beginModule(M);
 
-  if (!Asm || !MMI->hasDebugInfo())
+  if (!Asm)
 return;
 
   unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
+  if (NumDebugCUs == 0)
+return;
+
   assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
-  assert(MMI->hasDebugInfo() &&
- "DebugInfoAvailabilty unexpectedly not initialized");
   SingleCU = NumDebugCUs == 1;
   DenseMap>
   GVMap;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp 
b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0b654abd2814c..b4eba07afe7c5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() {
   // Emit initial .loc debug directive for correct relocation symbol data.
   if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
 assert(SP->getUnit());
-if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
+if (!SP->getUnit()->isDebugDirectivesOnly())
   emitInitialRawDwarfLocDirective(*MF);
   }
 }
@@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream ,
 if (HasFullDebugInfo)
   break;
   }
-  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
+  if (HasFullDebugInfo)
 O << ", debug";
 
   O << "\n";

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo (PR #100368)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> `_fltused` is used to identify the need for supporting functions and is used 
> during the linking process. Always emitting results in code size increases 
> and overlinking, but catching the generated ones is definitely something that 
> we should be doing.

So really it should be looking for post-legalize calls to specific runtime 
libcalls

https://github.com/llvm/llvm-project/pull/100368
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100380

>From 1b48c68a9130ebb9fb7a68752be79716fe075dad Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 1 Feb 2023 09:52:34 -0400
Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp

InstCombine also should have taken care of this, but this
should be helpful when the fcmp based lowering strategy tries
to combine multiple tests.
---
 llvm/lib/CodeGen/CodeGenCommonISel.cpp|   2 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  35 -
 llvm/test/CodeGen/AArch64/isinf.ll|  22 ++-
 llvm/test/CodeGen/PowerPC/fp-classify.ll  |  32 ++--
 llvm/test/CodeGen/X86/is_fpclass-fp80.ll  |  52 +++
 llvm/test/CodeGen/X86/is_fpclass.ll   | 137 +-
 6 files changed, 159 insertions(+), 121 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp 
b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index f5207d8b9d124..bb09b0d1140fc 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest 
Test, bool UseFP) {
   case fcSubnormal | fcZero | fcNan:
 return InvertedTest;
   case fcInf | fcNan:
+  case fcPosInf | fcNan:
+  case fcNegInf | fcNan:
 // If we're trying to use fcmp, we can take advantage of the nan check
 // behavior of the compare (but this is more instructions in the integer
 // expansion).
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ba7c89a33f604..0036c182ab9db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8599,6 +8599,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : 
ISD::SETUEQ;
 
+// See if we can fold an | fcNan into an unordered compare.
+FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
+
+// Can't fold the ordered check if we're only testing for snan or qnan
+// individually.
+if ((FPTestMask & fcNan) != fcNan)
+  OrderedFPTestMask = FPTestMask;
+
+const bool IsOrdered = FPTestMask == OrderedFPTestMask;
+
 if (std::optional IsCmp0 =
 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
 IsCmp0 && (isCondCodeLegalOrCustom(
@@ -8618,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
   return DAG.getSetCC(DL, ResultVT, Op, Op,
   IsInvertedFP ? ISD::SETO : ISD::SETUO);
 
-bool IsOrderedInf = FPTestMask == fcInf;
-if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
-isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
- : UnorderedCmpOpcode,
-OperandVT.getScalarType().getSimpleVT()) &&
-isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+if (OrderedFPTestMask == fcInf &&
+isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
+  : UnorderedCmpOpcode,
+OperandVT.getScalarType().getSimpleVT())) {
   // isinf(x) --> fabs(x) == inf
   SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
   SDValue Inf =
   DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
   return DAG.getSetCC(DL, ResultVT, Abs, Inf,
-  IsOrderedInf ? OrderedCmpOpcode : 
UnorderedCmpOpcode);
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
+}
+
+if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) {
+  // isposinf(x) --> x == inf
+  // isneginf(x) --> x == -inf
+  // isposinf(x) || nan --> x u== inf
+  // isneginf(x) || nan --> x u== -inf
+
+  SDValue Inf = DAG.getConstantFP(
+  APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
+  OperandVT);
+  return DAG.getSetCC(DL, ResultVT, Op, Inf,
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
 }
   }
 
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll 
b/llvm/test/CodeGen/AArch64/isinf.ll
index 834417b98743a..458bd7eeba16c 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,14 +58,22 @@ define i32 @replace_isinf_call_f64(double %x) {
 define i32 @replace_isinf_call_f128(fp128 %x) {
 ; CHECK-LABEL: replace_isinf_call_f128:
 ; CHECK:   // %bb.0:
-; CHECK-NEXT:str q0, [sp, #-16]!
-; CHECK-NEXT:.cfi_def_cfa_offset 16
-; CHECK-NEXT:ldp x9, x8, [sp], #16
-; CHECK-NEXT:and x8, x8, #0x7fff
-; CHECK-NEXT:eor x8, x8, #0x7fff
-; CHECK-NEXT:orr x8, x9, x8
-; CHECK-NEXT:cmp x8, #0
+; 

[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100380

>From 37ecc7b70321cdd7ed369d4bec6db50b3f112537 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 1 Feb 2023 09:52:34 -0400
Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp

InstCombine also should have taken care of this, but this
should be helpful when the fcmp based lowering strategy tries
to combine multiple tests.
---
 llvm/lib/CodeGen/CodeGenCommonISel.cpp|   2 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  35 -
 llvm/test/CodeGen/PowerPC/fp-classify.ll  |  32 ++--
 llvm/test/CodeGen/X86/is_fpclass-fp80.ll  |  52 +++
 llvm/test/CodeGen/X86/is_fpclass.ll   | 137 +-
 5 files changed, 144 insertions(+), 114 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp 
b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index f5207d8b9d124..bb09b0d1140fc 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest 
Test, bool UseFP) {
   case fcSubnormal | fcZero | fcNan:
 return InvertedTest;
   case fcInf | fcNan:
+  case fcPosInf | fcNan:
+  case fcNegInf | fcNan:
 // If we're trying to use fcmp, we can take advantage of the nan check
 // behavior of the compare (but this is more instructions in the integer
 // expansion).
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ba7c89a33f604..0036c182ab9db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8599,6 +8599,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : 
ISD::SETUEQ;
 
+// See if we can fold an | fcNan into an unordered compare.
+FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
+
+// Can't fold the ordered check if we're only testing for snan or qnan
+// individually.
+if ((FPTestMask & fcNan) != fcNan)
+  OrderedFPTestMask = FPTestMask;
+
+const bool IsOrdered = FPTestMask == OrderedFPTestMask;
+
 if (std::optional IsCmp0 =
 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
 IsCmp0 && (isCondCodeLegalOrCustom(
@@ -8618,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
   return DAG.getSetCC(DL, ResultVT, Op, Op,
   IsInvertedFP ? ISD::SETO : ISD::SETUO);
 
-bool IsOrderedInf = FPTestMask == fcInf;
-if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
-isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
- : UnorderedCmpOpcode,
-OperandVT.getScalarType().getSimpleVT()) &&
-isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+if (OrderedFPTestMask == fcInf &&
+isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
+  : UnorderedCmpOpcode,
+OperandVT.getScalarType().getSimpleVT())) {
   // isinf(x) --> fabs(x) == inf
   SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
   SDValue Inf =
   DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
   return DAG.getSetCC(DL, ResultVT, Abs, Inf,
-  IsOrderedInf ? OrderedCmpOpcode : 
UnorderedCmpOpcode);
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
+}
+
+if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) {
+  // isposinf(x) --> x == inf
+  // isneginf(x) --> x == -inf
+  // isposinf(x) || nan --> x u== inf
+  // isneginf(x) || nan --> x u== -inf
+
+  SDValue Inf = DAG.getConstantFP(
+  APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
+  OperandVT);
+  return DAG.getSetCC(DL, ResultVT, Op, Inf,
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
 }
   }
 
diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll 
b/llvm/test/CodeGen/PowerPC/fp-classify.ll
index f527b3c48040e..50873f29b2936 100644
--- a/llvm/test/CodeGen/PowerPC/fp-classify.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll
@@ -57,18 +57,30 @@ entry:
 define zeroext i1 @abs_isinfq(fp128 %x) {
 ; P8-LABEL: abs_isinfq:
 ; P8:   # %bb.0: # %entry
+; P8-NEXT:mflr 0
+; P8-NEXT:stdu 1, -48(1)
+; P8-NEXT:std 0, 64(1)
+; P8-NEXT:.cfi_def_cfa_offset 48
+; P8-NEXT:.cfi_offset lr, 16
 ; P8-NEXT:xxswapd 0, 34
-; P8-NEXT:addi 3, 1, -16
-; P8-NEXT:li 5, 32767
+; P8-NEXT:addi 3, 1, 32
 ; P8-NEXT:stxvd2x 0, 0, 3
-; P8-NEXT:rldic 5, 5, 48, 1
-; P8-NEXT:ld 4, -8(1)
-; P8-NEXT:

[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100380?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100380** https://app.graphite.dev/github/pr/llvm/llvm-project/100380?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100378** https://app.graphite.dev/github/pr/llvm/llvm-project/100378?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100380
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100380

InstCombine also should have taken care of this, but this
should be helpful when the fcmp based lowering strategy tries
to combine multiple tests.

>From c27e0441cacf32077d0c101304a0b0b3d336058c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 1 Feb 2023 09:52:34 -0400
Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp

InstCombine also should have taken care of this, but this
should be helpful when the fcmp based lowering strategy tries
to combine multiple tests.
---
 llvm/lib/CodeGen/CodeGenCommonISel.cpp|   2 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  35 -
 llvm/test/CodeGen/X86/is_fpclass-fp80.ll  |  52 +++
 llvm/test/CodeGen/X86/is_fpclass.ll   | 137 +-
 4 files changed, 122 insertions(+), 104 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp 
b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index f5207d8b9d124..bb09b0d1140fc 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest 
Test, bool UseFP) {
   case fcSubnormal | fcZero | fcNan:
 return InvertedTest;
   case fcInf | fcNan:
+  case fcPosInf | fcNan:
+  case fcNegInf | fcNan:
 // If we're trying to use fcmp, we can take advantage of the nan check
 // behavior of the compare (but this is more instructions in the integer
 // expansion).
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ba7c89a33f604..0036c182ab9db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8599,6 +8599,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : 
ISD::SETUEQ;
 
+// See if we can fold an | fcNan into an unordered compare.
+FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
+
+// Can't fold the ordered check if we're only testing for snan or qnan
+// individually.
+if ((FPTestMask & fcNan) != fcNan)
+  OrderedFPTestMask = FPTestMask;
+
+const bool IsOrdered = FPTestMask == OrderedFPTestMask;
+
 if (std::optional IsCmp0 =
 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
 IsCmp0 && (isCondCodeLegalOrCustom(
@@ -8618,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, 
SDValue Op,
   return DAG.getSetCC(DL, ResultVT, Op, Op,
   IsInvertedFP ? ISD::SETO : ISD::SETUO);
 
-bool IsOrderedInf = FPTestMask == fcInf;
-if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
-isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
- : UnorderedCmpOpcode,
-OperandVT.getScalarType().getSimpleVT()) &&
-isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+if (OrderedFPTestMask == fcInf &&
+isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
+  : UnorderedCmpOpcode,
+OperandVT.getScalarType().getSimpleVT())) {
   // isinf(x) --> fabs(x) == inf
   SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
   SDValue Inf =
   DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
   return DAG.getSetCC(DL, ResultVT, Abs, Inf,
-  IsOrderedInf ? OrderedCmpOpcode : 
UnorderedCmpOpcode);
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
+}
+
+if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) {
+  // isposinf(x) --> x == inf
+  // isneginf(x) --> x == -inf
+  // isposinf(x) || nan --> x u== inf
+  // isneginf(x) || nan --> x u== -inf
+
+  SDValue Inf = DAG.getConstantFP(
+  APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
+  OperandVT);
+  return DAG.getSetCC(DL, ResultVT, Op, Inf,
+  IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
 }
   }
 
diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll 
b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
index 52d294ca01720..56d3ba7cd7b2b 100644
--- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
@@ -265,23 +265,24 @@ entry:
 define i1 @is_posinf_f80(x86_fp80 %x) nounwind {
 ; X86-LABEL: is_posinf_f80:
 ; X86:   # %bb.0: # %entry
-; X86-NEXT:movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:movl $-2147483648, %ecx # imm = 0x8000
-; X86-NEXT:xorl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:xorl $32767, %eax # imm = 0x7FFF
-; X86-NEXT:orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:orl %ecx, 

[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100353
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100353?utm_source=stack-comment-downstack-mergeability-warning;
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests;>Learn more

* **#100353** https://app.graphite.dev/github/pr/llvm/llvm-project/100353?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/> 
* **#100333** https://app.graphite.dev/github/pr/llvm/llvm-project/100333?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* **#100319** https://app.graphite.dev/github/pr/llvm/llvm-project/100319?utm_source=stack-comment-icon;
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100353
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100353

I don't see the point of this check and SelectionDAG
does not perform it. In the normal usecase, if there's
no debug info the debug intrinsics would not be there
in the first place.

>From 1e3fdb2a58c6709e4d69fd0facdfdec6916802be Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 15:53:01 +0400
Subject: [PATCH] FastISel: Do not check for module debug info

I don't see the point of this check and SelectionDAG
does not perform it. In the normal usecase, if there's
no debug info the debug intrinsics would not be there
in the first place.
---
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 16 
 1 file changed, 16 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ef9f783355190..e255bbaa92b16 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1203,11 +1203,6 @@ void FastISel::handleDbgInfo(const Instruction *II) {
 
 if (DbgLabelRecord *DLR = dyn_cast()) {
   assert(DLR->getLabel() && "Missing label");
-  if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
-LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n");
-continue;
-  }
-
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(),
   TII.get(TargetOpcode::DBG_LABEL))
   .addMetadata(DLR->getLabel());
@@ -1402,12 +1397,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst 
*II) {
   case Intrinsic::dbg_declare: {
 const DbgDeclareInst *DI = cast(II);
 assert(DI->getVariable() && "Missing variable");
-if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
-  LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
-<< " (!hasDebugInfo)\n");
-  return true;
-}
-
 if (FuncInfo.PreprocessedDbgDeclares.contains(DI))
   return true;
 
@@ -1446,11 +1435,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst 
*II) {
   case Intrinsic::dbg_label: {
 const DbgLabelInst *DI = cast(II);
 assert(DI->getLabel() && "Missing label");
-if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
-  LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
-  return true;
-}
-
 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
 TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
 return true;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100333

>From 6367409181a47493058cede13de0b623d59e4b45 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 13:11:04 +0400
Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks

I assume getSubprogram will do the correct thing in hasDebugInfo,
and this is redundant with the debug_compile_units distance check.
This is in preparation for removing the field.
---
 llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp   | 7 ---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 6c70c47de8822..ed99eb3c459e5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
  Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
-static bool hasDebugInfo(const MachineModuleInfo *MMI,
- const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo())
-return false;
+static bool hasDebugInfo(const MachineFunction *MF) {
   auto *SP = MF->getFunction().getSubprogram();
   if (!SP)
 return false;
@@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
 void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
   PrevInstBB = nullptr;
 
-  if (!Asm || !hasDebugInfo(MMI, MF)) {
+  if (!Asm || !hasDebugInfo(MF)) {
 skippedNonDebugFunction();
 return;
   }
@@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() {
 }
 
 void DebugHandlerBase::endFunction(const MachineFunction *MF) {
-  if (Asm && hasDebugInfo(MMI, MF))
+  if (Asm && hasDebugInfo(MF))
 endFunctionImpl(MF);
   DbgValues.clear();
   DbgLabels.clear();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5f1f315c5ab24..fbce7e92b7781 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1148,14 +1148,15 @@ 
sortGlobalExprs(SmallVectorImpl ) {
 void DwarfDebug::beginModule(Module *M) {
   DebugHandlerBase::beginModule(M);
 
-  if (!Asm || !MMI->hasDebugInfo())
+  if (!Asm)
 return;
 
   unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
+  if (NumDebugCUs == 0)
+return;
+
   assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
-  assert(MMI->hasDebugInfo() &&
- "DebugInfoAvailabilty unexpectedly not initialized");
   SingleCU = NumDebugCUs == 1;
   DenseMap>
   GVMap;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp 
b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0b654abd2814c..b4eba07afe7c5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() {
   // Emit initial .loc debug directive for correct relocation symbol data.
   if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
 assert(SP->getUnit());
-if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
+if (!SP->getUnit()->isDebugDirectivesOnly())
   emitInitialRawDwarfLocDirective(*MF);
   }
 }
@@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream ,
 if (HasFullDebugInfo)
   break;
   }
-  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
+  if (HasFullDebugInfo)
 O << ", debug";
 
   O << "\n";

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100333

>From 6367409181a47493058cede13de0b623d59e4b45 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 13:11:04 +0400
Subject: [PATCH 1/2] DebugInfo: Avoid some MMI::hasDebugInfo checks

I assume getSubprogram will do the correct thing in hasDebugInfo,
and this is redundant with the debug_compile_units distance check.
This is in preparation for removing the field.
---
 llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp   | 7 ---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 6c70c47de8822..ed99eb3c459e5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
  Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
-static bool hasDebugInfo(const MachineModuleInfo *MMI,
- const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo())
-return false;
+static bool hasDebugInfo(const MachineFunction *MF) {
   auto *SP = MF->getFunction().getSubprogram();
   if (!SP)
 return false;
@@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
 void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
   PrevInstBB = nullptr;
 
-  if (!Asm || !hasDebugInfo(MMI, MF)) {
+  if (!Asm || !hasDebugInfo(MF)) {
 skippedNonDebugFunction();
 return;
   }
@@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() {
 }
 
 void DebugHandlerBase::endFunction(const MachineFunction *MF) {
-  if (Asm && hasDebugInfo(MMI, MF))
+  if (Asm && hasDebugInfo(MF))
 endFunctionImpl(MF);
   DbgValues.clear();
   DbgLabels.clear();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5f1f315c5ab24..fbce7e92b7781 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1148,14 +1148,15 @@ 
sortGlobalExprs(SmallVectorImpl ) {
 void DwarfDebug::beginModule(Module *M) {
   DebugHandlerBase::beginModule(M);
 
-  if (!Asm || !MMI->hasDebugInfo())
+  if (!Asm)
 return;
 
   unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
+  if (NumDebugCUs == 0)
+return;
+
   assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
-  assert(MMI->hasDebugInfo() &&
- "DebugInfoAvailabilty unexpectedly not initialized");
   SingleCU = NumDebugCUs == 1;
   DenseMap>
   GVMap;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp 
b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0b654abd2814c..b4eba07afe7c5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() {
   // Emit initial .loc debug directive for correct relocation symbol data.
   if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
 assert(SP->getUnit());
-if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
+if (!SP->getUnit()->isDebugDirectivesOnly())
   emitInitialRawDwarfLocDirective(*MF);
   }
 }
@@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream ,
 if (HasFullDebugInfo)
   break;
   }
-  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
+  if (HasFullDebugInfo)
 O << ", debug";
 
   O << "\n";

>From 1e3fdb2a58c6709e4d69fd0facdfdec6916802be Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 15:53:01 +0400
Subject: [PATCH 2/2] FastISel: Do not check for module debug info

I don't see the point of this check and SelectionDAG
does not perform it. In the normal usecase, if there's
no debug info the debug intrinsics would not be there
in the first place.
---
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 16 
 1 file changed, 16 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ef9f783355190..e255bbaa92b16 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1203,11 +1203,6 @@ void FastISel::handleDbgInfo(const Instruction *II) {
 
 if (DbgLabelRecord *DLR = dyn_cast()) {
   assert(DLR->getLabel() && "Missing label");
-  if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
-LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n");
-continue;
-  }
-
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(),
   TII.get(TargetOpcode::DBG_LABEL))
   .addMetadata(DLR->getLabel());
@@ -1402,12 +1397,6 @@ bool 

[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)

2024-07-24 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100333

>From 442532e0d50039d0bb3603520d361b2ee4b4a1b5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 24 Jul 2024 13:11:04 +0400
Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks

I assume getSubprogram will do the correct thing in hasDebugInfo,
and this is redundant with the debug_compile_units distance check.
This is in preparation for removing the field.
---
 llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp   | 7 ---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 6c70c47de8822..ed99eb3c459e5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
  Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
-static bool hasDebugInfo(const MachineModuleInfo *MMI,
- const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo())
-return false;
+static bool hasDebugInfo(const MachineFunction *MF) {
   auto *SP = MF->getFunction().getSubprogram();
   if (!SP)
 return false;
@@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
 void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
   PrevInstBB = nullptr;
 
-  if (!Asm || !hasDebugInfo(MMI, MF)) {
+  if (!Asm || !hasDebugInfo(MF)) {
 skippedNonDebugFunction();
 return;
   }
@@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() {
 }
 
 void DebugHandlerBase::endFunction(const MachineFunction *MF) {
-  if (Asm && hasDebugInfo(MMI, MF))
+  if (Asm && hasDebugInfo(MF))
 endFunctionImpl(MF);
   DbgValues.clear();
   DbgLabels.clear();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5f1f315c5ab24..fbce7e92b7781 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1148,14 +1148,15 @@ 
sortGlobalExprs(SmallVectorImpl ) {
 void DwarfDebug::beginModule(Module *M) {
   DebugHandlerBase::beginModule(M);
 
-  if (!Asm || !MMI->hasDebugInfo())
+  if (!Asm)
 return;
 
   unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
+  if (NumDebugCUs == 0)
+return;
+
   assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
-  assert(MMI->hasDebugInfo() &&
- "DebugInfoAvailabilty unexpectedly not initialized");
   SingleCU = NumDebugCUs == 1;
   DenseMap>
   GVMap;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp 
b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0b654abd2814c..b4eba07afe7c5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() {
   // Emit initial .loc debug directive for correct relocation symbol data.
   if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
 assert(SP->getUnit());
-if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
+if (!SP->getUnit()->isDebugDirectivesOnly())
   emitInitialRawDwarfLocDirective(*MF);
   }
 }
@@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream ,
 if (HasFullDebugInfo)
   break;
   }
-  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
+  if (HasFullDebugInfo)
 O << ", debug";
 
   O << "\n";

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   3   4   5   >