[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/97050 >From 5672042d638e13794e09d981f286fef487b05206 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Jun 2024 16:32:48 +0200 Subject: [PATCH] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics These are now fully covered by atomicrmw. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 - llvm/lib/IR/AutoUpgrade.cpp | 14 +- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 - .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 - .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - llvm/lib/Target/AMDGPU/FLATInstructions.td| 2 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/test/Bitcode/amdgcn-atomic.ll| 22 ++ .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll| 106 - .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 218 -- llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll | 193 11 files changed, 33 insertions(+), 538 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 8c25467cc5e4b..e24571d8b184c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2990,10 +2990,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic { def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic; } -// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; - defset list AMDGPUMFMAIntrinsics940 = { def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 53de9eef516b3..f566a0e3c3043 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *, } if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax")) { + Name.starts_with("ds.fmax") || + Name.starts_with("global.atomic.fadd.v2bf16") || + Name.starts_with("flat.atomic.fadd.v2bf16")) { // Replaced with atomicrmw fadd/fmin/fmax, so there's no new // declaration. NewFn = nullptr; @@ -4042,7 +4044,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("ds.fmin", AtomicRMWInst::FMin) .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) - .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); + .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) + .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. @@ -4097,8 +4101,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); if (PtrTy->getAddressSpace() != 3) { -RMW->setMetadata("amdgpu.no.fine.grained.memory", - MDNode::get(F->getContext(), {})); +MDNode *EmptyMD = MDNode::get(F->getContext(), {}); +RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); +if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) + RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } if (IsVolatile) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c6dbc58395e48..db8b44149cf47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op { defm int_amdgcn_flat_atomic_fadd : noret_op; defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; -defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_flat_atomic_fmin : noret_op; defm int_amdgcn_flat_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; -defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9a6ba5ac68084..5e4f9f4365be0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4896,8 +4896,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr ) const { case Intrinsic::amdgcn_flat_atomic_fmax: case
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (PR #96876)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96876 >From 55fc7946a4480b2dd1befd579805623a56f5fd1a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 23:18:32 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 36 +-- .../builtins-fp-atomics-gfx90a.cl | 18 ++ 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3afe3135d99d6..b9f2c0f510b1b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18777,32 +18777,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { -Intrinsic::ID IID; -llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - IID = Intrinsic::amdgcn_global_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - IID = Intrinsic::amdgcn_global_atomic_fmax; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmax; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = -CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19186,7 +19160,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19213,8 +19191,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: BinOp = llvm::AtomicRMWInst::FMin; break; +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: BinOp = llvm::AtomicRMWInst::FMax; break; diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index 9381ce951df3e..556e553903d1a 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) { } // CHECK-LABEL: test_global_global_min_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_global_min_f64$local // GFX90A: global_atomic_min_f64 void test_global_global_min_f64(__global double *addr, double x){ @@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double x){ } // CHECK-LABEL: test_global_max_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_max_f64$local // GFX90A: global_atomic_max_f64 void test_global_max_f64(__global double *addr, double x){ @@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr,
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (PR #96875)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96875 >From 8ac629544dcf9fa4c35310abb89491b77e3292ba Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:34:43 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 26 ++- .../builtins-fp-atomics-gfx12.cl | 24 - .../builtins-fp-atomics-gfx90a.cl | 6 ++--- .../builtins-fp-atomics-gfx940.cl | 14 +++--- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c19a80921beaf..3afe3135d99d6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18803,22 +18803,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); return Builder.CreateCall(F, {Addr, Val}); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { -Intrinsic::ID IID; -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19200,7 +19184,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19222,6 +19208,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19266,7 +19254,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. - if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) { llvm::Type *V2BF16Ty = FixedVectorType::get( llvm::Type::getBFloatTy(Builder.getContext()), 2); Val = Builder.CreateBitCast(Val, V2BF16Ty); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 07e63a8711c7f..e8b6eb57c38d7 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_local_add_2bf16 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 +// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX12-LABEL: test_local_add_2bf16 @@ -48,7 +48,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX12-LABEL:
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96874 >From 2c443d8a9daeb42234e585d0d9547634409952a9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:15:26 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++--- .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl | 6 -- .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ef4bd9fb4af09..c19a80921beaf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18779,10 +18779,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -18792,19 +18790,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19207,7 +19198,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19227,6 +19220,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index cd10777dbe079..02e289427238f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_add_local_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}} + // GFX90A-LABEL: test_flat_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_flat_add_local_f64(__local double *addr, double x){ @@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_flat_global_add_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_add_f64$local // GFX90A: global_atomic_add_f64 void test_flat_global_add_f64(__global double *addr, double x){ diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index 589dcd406630d..bd9b8c7268e06 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -10,7 +10,8 @@ typedef half
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96873 >From 367f6897698f22c30cb7491d90ae0251bfa57af1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:12:59 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 20 ++- .../builtins-fp-atomics-gfx12.cl | 9 ++--- .../builtins-fp-atomics-gfx90a.cl | 2 +- .../builtins-fp-atomics-gfx940.cl | 3 ++- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ad4cce77221a6..ef4bd9fb4af09 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18777,22 +18777,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -18812,11 +18805,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgTy = llvm::Type::getFloatTy(getLLVMContext()); IID = Intrinsic::amdgcn_flat_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19217,7 +19205,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_fminf: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19235,6 +19225,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 6b8a6d14575db..07e63a8711c7f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_flat_add_2f16 // GFX12: flat_atomic_pk_add_f16 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) { @@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { } // CHECK-LABEL: test_global_add_half2 -// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_global_add_half2 // GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2,
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96872 >From ea17c792053e32e39a7261e3bdf1673d98e4d94a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Jun 2024 10:58:44 +0200 Subject: [PATCH 1/2] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} Need to emit syncscope and new metadata to get the native instruction, most of the time. --- clang/lib/CodeGen/CGBuiltin.cpp | 39 +-- .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl| 2 +- .../builtins-fp-atomics-gfx12.cl | 4 +- .../builtins-fp-atomics-gfx90a.cl | 4 +- .../builtins-fp-atomics-gfx940.cl | 4 +- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0c2ee446aa303..02f85f340893d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -18776,8 +18777,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: @@ -18789,18 +18788,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: ArgTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(getLLVMContext()), 2); IID = Intrinsic::amdgcn_global_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -19223,7 +19215,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19239,6 +19233,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19273,8 +19269,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), AO, SSID); } else { - // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. - SSID = llvm::SyncScope::System; + // Most of the builtins do not have syncscope/order arguments. For DS + // atomics the scope doesn't really matter, as they implicitly operate at + // workgroup scope. + // + // The global/flat cases need to use agent scope to consistently produce + // the native instruction instead of a cmpxchg expansion. + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::SequentiallyConsistent; // The v2bf16 builtin uses i16 instead of a natural bfloat type. @@ -19289,6 +19290,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); + +unsigned AddrSpace = Ptr.getType()->getAddressSpace(); +if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) { + // Most targets require "amdgpu.no.fine.grained.memory" to emit the
[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)
@@ -75,6 +75,11 @@ Changes to the AArch64 Backend Changes to the AMDGPU Backend - +* Removed ``llvm.amdgcn.flat.atomic.fadd`` and + ``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the + :ref:`atomicrmw ` instruction with `fadd` and arsenm wrote: This refers to i_atomicrmw? The documentation bot passes. https://github.com/llvm/llvm-project/pull/97051 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)
@@ -1017,29 +1015,6 @@ main_body: ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, double %data) { arsenm wrote: Depends if they are redundant or not. Some cases already tested atomicrmw, and had the intrinsic alongside it. We still have a lot of redundancy spread across multiple files https://github.com/llvm/llvm-project/pull/97051 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)
@@ -322,4 +322,36 @@ define <2 x i16> @upgrade_amdgcn_global_atomic_fadd_v2bf16_p1(ptr addrspace(1) % ret <2 x i16> %result } +declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr nocapture, <2 x half>) #0 arsenm wrote: Yes, but also no. These tests should use llvm-as/llvm-dis instead of opt, and the update scripts don't understand that https://github.com/llvm/llvm-project/pull/97051 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96874 >From c8a9e8de2d0faf678ab8d67c85c4efd8312d5d10 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:15:26 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++--- .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl | 6 -- .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ef4bd9fb4af09..c19a80921beaf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18779,10 +18779,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -18792,19 +18790,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19207,7 +19198,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19227,6 +19220,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index cd10777dbe079..02e289427238f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_add_local_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}} + // GFX90A-LABEL: test_flat_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_flat_add_local_f64(__local double *addr, double x){ @@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_flat_global_add_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_add_f64$local // GFX90A: global_atomic_add_f64 void test_flat_global_add_f64(__global double *addr, double x){ diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index 589dcd406630d..bd9b8c7268e06 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -10,7 +10,8 @@ typedef half
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96873 >From 7305c0477711f7b26e4ebad3cca0afa33e1defa9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:12:59 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 20 ++- .../builtins-fp-atomics-gfx12.cl | 9 ++--- .../builtins-fp-atomics-gfx90a.cl | 2 +- .../builtins-fp-atomics-gfx940.cl | 3 ++- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ad4cce77221a6..ef4bd9fb4af09 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18777,22 +18777,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -18812,11 +18805,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgTy = llvm::Type::getFloatTy(getLLVMContext()); IID = Intrinsic::amdgcn_flat_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19217,7 +19205,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_fminf: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19235,6 +19225,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 6b8a6d14575db..07e63a8711c7f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_flat_add_2f16 // GFX12: flat_atomic_pk_add_f16 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) { @@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { } // CHECK-LABEL: test_global_add_half2 -// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_global_add_half2 // GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2,
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96872 >From 2e27b153cf40498f64ef9f13b69e80804c45a6a4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Jun 2024 10:58:44 +0200 Subject: [PATCH 1/2] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} Need to emit syncscope and new metadata to get the native instruction, most of the time. --- clang/lib/CodeGen/CGBuiltin.cpp | 39 +-- .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl| 2 +- .../builtins-fp-atomics-gfx12.cl | 4 +- .../builtins-fp-atomics-gfx90a.cl | 4 +- .../builtins-fp-atomics-gfx940.cl | 4 +- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0c2ee446aa303..02f85f340893d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -18776,8 +18777,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: @@ -18789,18 +18788,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: ArgTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(getLLVMContext()), 2); IID = Intrinsic::amdgcn_global_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -19223,7 +19215,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19239,6 +19233,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19273,8 +19269,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), AO, SSID); } else { - // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. - SSID = llvm::SyncScope::System; + // Most of the builtins do not have syncscope/order arguments. For DS + // atomics the scope doesn't really matter, as they implicitly operate at + // workgroup scope. + // + // The global/flat cases need to use agent scope to consistently produce + // the native instruction instead of a cmpxchg expansion. + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::SequentiallyConsistent; // The v2bf16 builtin uses i16 instead of a natural bfloat type. @@ -19289,6 +19290,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); + +unsigned AddrSpace = Ptr.getType()->getAddressSpace(); +if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) { + // Most targets require "amdgpu.no.fine.grained.memory" to emit the
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
arsenm wrote: ping https://github.com/llvm/llvm-project/pull/96874 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)
arsenm wrote: ping https://github.com/llvm/llvm-project/pull/96873 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)
arsenm wrote: ping https://github.com/llvm/llvm-project/pull/96872 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)
arsenm wrote: ping https://github.com/llvm/llvm-project/pull/96760 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Enable vectorization of v2f16 copysign (PR #100799)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100799 >From ba0f8f03dc491562050a65456f7ebda23a7e4210 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 22:36:33 +0400 Subject: [PATCH] AMDGPU: Enable vectorization of v2f16 copysign --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 3 + .../Analysis/CostModel/AMDGPU/copysign.ll | 256 +- .../SLPVectorizer/AMDGPU/slp-v2f16.ll | 16 +- 3 files changed, 139 insertions(+), 136 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index d09f4fb2f659b..9e89898b11bcb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -688,6 +688,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { switch (ID) { case Intrinsic::fma: case Intrinsic::fmuladd: + case Intrinsic::copysign: // There's a small benefit to using vector ops in the legalized code. case Intrinsic::round: case Intrinsic::uadd_sat: @@ -739,6 +740,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , : getQuarterRateInstrCost(CostKind); } break; + case Intrinsic::copysign: +return NElts * getFullRateInstrCost(); case Intrinsic::uadd_sat: case Intrinsic::usub_sat: case Intrinsic::sadd_sat: diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll index 3b7b1b4238b8a..06a058ff2e7b1 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll @@ -12,90 +12,90 @@ define void @copysign_f16() { ; BASE-LABEL: 'copysign_f16' ; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef) -; BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef) -; BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef) -; BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) -; BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef) -; BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef) -; BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef) -; BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef) +; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef) +; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef) +; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) +; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef) +; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef) +; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef) +; BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef) ; BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX8-LABEL: 'copysign_f16' ; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 6a7346484924acdfbd630096e3dbbb4b14474028 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- .../Analysis/CostModel/AMDGPU/arith-ssat.ll | 32 +- .../Analysis/CostModel/AMDGPU/arith-usat.ll | 32 +- 5 files changed, 242 insertions(+), 231 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0b1ecc002ae25..8ae236850b982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; })) NElts = 1; break; + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; } return LT.first * NElts * InstRate; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..b86e99558377b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64
[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100521 >From 19f7331a579837b2657a5d0741c6633d6f8296da Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:33:23 +0400 Subject: [PATCH] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 56 +-- llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll | 116 ++--- llvm/test/Analysis/CostModel/X86/fptoi_sat.ll | 400 +- .../AggressiveInstCombine/ARM/fptosisat.ll| 49 ++- 4 files changed, 324 insertions(+), 297 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 1a089a3fa9634..ba70498bfb731 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2179,31 +2179,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::UMULO; break; case Intrinsic::fptosi_sat: -case Intrinsic::fptoui_sat: { - if (Tys.empty()) -break; - Type *FromTy = Tys[0]; - bool IsSigned = IID == Intrinsic::fptosi_sat; - - InstructionCost Cost = 0; - IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy, - {FromTy, FromTy}); - Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind); - IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy, - {FromTy, FromTy}); - Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind); - Cost += thisT()->getCastInstrCost( - IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy, - TTI::CastContextHint::None, CostKind); - if (IsSigned) { -Type *CondTy = RetTy->getWithNewBitWidth(1); -Cost += thisT()->getCmpSelInstrCost( -BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind); -Cost += thisT()->getCmpSelInstrCost( -BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind); - } - return Cost; -} + ISD = ISD::FP_TO_SINT_SAT; + break; +case Intrinsic::fptoui_sat: + ISD = ISD::FP_TO_UINT_SAT; + break; case Intrinsic::ctpop: ISD = ISD::CTPOP; // In case of legalization use TCC_Expensive. This is cheaper than a @@ -2418,6 +2398,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::fptosi_sat: +case Intrinsic::fptoui_sat: { + if (Tys.empty()) +break; + Type *FromTy = Tys[0]; + bool IsSigned = IID == Intrinsic::fptosi_sat; + + InstructionCost Cost = 0; + IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind); + Cost += thisT()->getCastInstrCost( + IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy, + TTI::CastContextHint::None, CostKind); + if (IsSigned) { +Type *CondTy = RetTy->getWithNewBitWidth(1); +Cost += thisT()->getCmpSelInstrCost( +BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind); +Cost += thisT()->getCmpSelInstrCost( +BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind); + } + return Cost; +} default: break; } diff --git a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll index aff7b19a9c87a..29c86fc778a98 100644 --- a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll @@ -4,26 +4,26 @@ define void @casts() { ; CHECK-MVE-LABEL: 'casts' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s32 = call i32
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519 >From 411c9c8f9fff386807a4ff6317dbec8a3eb1cd1a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 ++- .../Analysis/CostModel/X86/arith-overflow.ll | 8 +-- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index 963bb8a9d9fac..71bc6b5375c73 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1080,7 +1080,7 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>,
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100520 >From fc18583308ccaaf60bd234af160888a669648fef Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Correct costs of saturating add/sub intrinsics (PR #100808)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100808 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Correct costs of saturating add/sub intrinsics (PR #100808)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100808?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100808** https://app.graphite.dev/github/pr/llvm/llvm-project/100808?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100523** https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100808 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From b448d7ddbf60e4678daf2d8ec522a82ceca7d7a3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- .../Analysis/CostModel/AMDGPU/arith-ssat.ll | 32 +- .../Analysis/CostModel/AMDGPU/arith-usat.ll | 32 +- 5 files changed, 242 insertions(+), 231 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0b1ecc002ae25..8ae236850b982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; })) NElts = 1; break; + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; } return LT.first * NElts * InstRate; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..b86e99558377b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100520 >From c382d2f8f2e2d0660bd3f1db5007e2a5f3cfa3cc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519 >From f154bdbc4048a943d23480ca00b894f0853bdf73 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 ++- .../Analysis/CostModel/X86/arith-overflow.ll | 8 +-- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index 963bb8a9d9fac..71bc6b5375c73 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1080,7 +1080,7 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>,
[llvm-branch-commits] [llvm] DAG: Lower fcNormal is.fpclass to compare with inf (PR #100389)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100389 >From f515257afc80ac1874ffb0e3d2697b2447a1bf5f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 1 Feb 2023 09:06:59 -0400 Subject: [PATCH] DAG: Lower fcNormal is.fpclass to compare with inf Looks worse for x86 without the fabs check. Not sure if this is useful for any targets. --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 25 +++ 1 file changed, 25 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 18cd368e24259..dcc65549d7a0e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8673,6 +8673,31 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, IsOrdered ? OrderedOp : UnorderedOp); } } + +if (FPTestMask == fcNormal) { + // TODO: Handle unordered + ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT; + ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE; + + if (isCondCodeLegalOrCustom(IsFiniteOp, + OperandVT.getScalarType().getSimpleVT()) && + isCondCodeLegalOrCustom(IsNormalOp, + OperandVT.getScalarType().getSimpleVT()) && + isFAbsFree(OperandVT)) { +// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal) +SDValue Inf = +DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); +SDValue SmallestNormal = DAG.getConstantFP( +APFloat::getSmallestNormalized(Semantics), DL, OperandVT); + +SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); +SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp); +SDValue IsNormal = +DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp); +unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND; +return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal); + } +} } // Some checks may be represented as inversion of simpler check, for example ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100380 >From 6226f310c474650b267a41d2509df5d0396ac481 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 1 Feb 2023 09:52:34 -0400 Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp InstCombine also should have taken care of this, but this should be helpful when the fcmp based lowering strategy tries to combine multiple tests. --- llvm/lib/CodeGen/CodeGenCommonISel.cpp| 2 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 25 +++- llvm/test/CodeGen/AArch64/isinf.ll| 22 ++- llvm/test/CodeGen/X86/is_fpclass-fp80.ll | 52 +++ llvm/test/CodeGen/X86/is_fpclass.ll | 137 +- 5 files changed, 127 insertions(+), 111 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 88c643c568027..942cf442e9098 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) { case fcSubnormal | fcZero | fcNan: return InvertedTest; case fcInf | fcNan: + case fcPosInf | fcNan: + case fcNegInf | fcNan: // If we're trying to use fcmp, we can take advantage of the nan check // behavior of the compare (but this is more instructions in the integer // expansion). diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1e12d7937ba79..18cd368e24259 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8628,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, return DAG.getSetCC(DL, ResultVT, Op, Op, IsInvertedFP ? ISD::SETO : ISD::SETUO); -bool IsOrderedInf = FPTestMask == fcInf; -if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) && -isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode - : UnorderedCmpOpcode, -OperandVT.getScalarType().getSimpleVT()) && -isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) { +if (OrderedFPTestMask == fcInf && +isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode + : UnorderedCmpOpcode, +OperandVT.getScalarType().getSimpleVT())) { // isinf(x) --> fabs(x) == inf SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); return DAG.getSetCC(DL, ResultVT, Abs, Inf, - IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode); + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); +} + +if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) { + // isposinf(x) --> x == inf + // isneginf(x) --> x == -inf + // isposinf(x) || nan --> x u== inf + // isneginf(x) || nan --> x u== -inf + + SDValue Inf = DAG.getConstantFP( + APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL, + OperandVT); + return DAG.getSetCC(DL, ResultVT, Op, Inf, + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); } if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) { diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index 834417b98743a..458bd7eeba16c 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -58,14 +58,22 @@ define i32 @replace_isinf_call_f64(double %x) { define i32 @replace_isinf_call_f128(fp128 %x) { ; CHECK-LABEL: replace_isinf_call_f128: ; CHECK: // %bb.0: -; CHECK-NEXT:str q0, [sp, #-16]! -; CHECK-NEXT:.cfi_def_cfa_offset 16 -; CHECK-NEXT:ldp x9, x8, [sp], #16 -; CHECK-NEXT:and x8, x8, #0x7fff -; CHECK-NEXT:eor x8, x8, #0x7fff -; CHECK-NEXT:orr x8, x9, x8 -; CHECK-NEXT:cmp x8, #0 +; CHECK-NEXT:sub sp, sp, #32 +; CHECK-NEXT:str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT:.cfi_def_cfa_offset 32 +; CHECK-NEXT:.cfi_offset w30, -16 +; CHECK-NEXT:str q0, [sp] +; CHECK-NEXT:ldrb w8, [sp, #15] +; CHECK-NEXT:and w8, w8, #0x7f +; CHECK-NEXT:strb w8, [sp, #15] +; CHECK-NEXT:adrp x8, .LCPI3_0 +; CHECK-NEXT:ldr q0, [sp] +; CHECK-NEXT:ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT:bl __eqtf2 +; CHECK-NEXT:cmp w0, #0 +; CHECK-NEXT:ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT:cset w0, eq +; CHECK-NEXT:add sp, sp, #32 ; CHECK-NEXT:ret %abs = tail call fp128 @llvm.fabs.f128(fp128 %x) %cmpinf = fcmp oeq fp128 %abs, 0xL7FFF diff --git
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Select all constants in tablegen (PR #100788)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100788 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Cleanup immediate selection patterns (PR #100787)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100787 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Select all constants in tablegen (PR #100788)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100788?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100788** https://app.graphite.dev/github/pr/llvm/llvm-project/100788?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100787** https://app.graphite.dev/github/pr/llvm/llvm-project/100787?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100786** https://app.graphite.dev/github/pr/llvm/llvm-project/100786?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100788 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Cleanup immediate selection patterns (PR #100787)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100787?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100788** https://app.graphite.dev/github/pr/llvm/llvm-project/100788?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100787** https://app.graphite.dev/github/pr/llvm/llvm-project/100787?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100786** https://app.graphite.dev/github/pr/llvm/llvm-project/100786?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100787 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Select all constants in tablegen (PR #100788)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100788 This regresses the arbitrary address space pointer case. Ideally we could write a pattern that matches a pointer based only on its size, but using iPTR/iPTRAny seem to not work for this. >From e75e929777d8ffc856427fdf70df10a94650cd26 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 26 Jul 2024 21:32:22 +0400 Subject: [PATCH] AMDGPU/GlobalISel: Select all constants in tablegen This regresses the arbitrary address space pointer case. Ideally we could write a pattern that matches a pointer based only on its size, but using iPTR/iPTRAny seem to not work for this. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 97 +- .../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 - llvm/lib/Target/AMDGPU/SIInstructions.td | 44 --- .../GlobalISel/inst-select-constant.mir | 120 ++ 4 files changed, 62 insertions(+), 200 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 9a73629b0f0cd..73f3921b2ff4c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2503,98 +2503,6 @@ bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr ) const { return false; } -bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr ) const { - if (selectImpl(I, *CoverageInfo)) -return true; - - // FIXME: Relying on manual selection for 64-bit case, and pointer typed - // constants. - MachineBasicBlock *BB = I.getParent(); - MachineOperand = I.getOperand(1); - Register DstReg = I.getOperand(0).getReg(); - LLT Ty = MRI->getType(DstReg); - unsigned Size = Ty.getSizeInBits(); - assert((Size == 64 || Ty.isPointer()) && - "patterns should have selected this"); - - bool IsFP = false; - - // The AMDGPU backend only supports Imm operands and not CImm or FPImm. - if (ImmOp.isFPImm()) { -const APInt = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); -ImmOp.ChangeToImmediate(Imm.getZExtValue()); -IsFP = true; - } else if (ImmOp.isCImm()) { -ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue()); - } else { -llvm_unreachable("Not supported by g_constants"); - } - - const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); - const bool IsSgpr = DstRB->getID() == AMDGPU::SGPRRegBankID; - - unsigned Opcode; - if (DstRB->getID() == AMDGPU::VCCRegBankID) { -Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - } else if (Size == 64 && - AMDGPU::isValid32BitLiteral(I.getOperand(1).getImm(), IsFP)) { -Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO; -I.setDesc(TII.get(Opcode)); -I.addImplicitDefUseOperands(*MF); -return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } else { -Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - -// We should never produce s1 values on banks other than VCC. If the user of -// this already constrained the register, we may incorrectly think it's VCC -// if it wasn't originally. -if (Size == 1) - return false; - } - - if (Size != 64) { -I.setDesc(TII.get(Opcode)); -I.addImplicitDefUseOperands(*MF); -return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } - - const DebugLoc = I.getDebugLoc(); - - APInt Imm(Size, I.getOperand(1).getImm()); - - MachineInstr *ResInst; - if (IsSgpr && TII.isInlineConstant(Imm)) { -ResInst = BuildMI(*BB, , DL, TII.get(AMDGPU::S_MOV_B64), DstReg) - .addImm(I.getOperand(1).getImm()); - } else { -const TargetRegisterClass *RC = IsSgpr ? - ::SReg_32RegClass : ::VGPR_32RegClass; -Register LoReg = MRI->createVirtualRegister(RC); -Register HiReg = MRI->createVirtualRegister(RC); - -BuildMI(*BB, , DL, TII.get(Opcode), LoReg) - .addImm(Imm.trunc(32).getZExtValue()); - -BuildMI(*BB, , DL, TII.get(Opcode), HiReg) - .addImm(Imm.ashr(32).getZExtValue()); - -ResInst = BuildMI(*BB, , DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); - } - - // We can't call constrainSelectedInstRegOperands here, because it doesn't - // work for target independent opcodes - I.eraseFromParent(); - const TargetRegisterClass *DstRC = -TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI); - if (!DstRC) -return true; - return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); -} - bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr ) const { // Only manually handle the f64 SGPR case. // @@ -3521,9 +3429,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr ) { case TargetOpcode::G_PTRTOINT: case TargetOpcode::G_FREEZE: return selectCOPY(I); - case TargetOpcode::G_CONSTANT: - case TargetOpcode::G_FCONSTANT: -return
[llvm-branch-commits] [llvm] AMDGPU: Cleanup immediate selection patterns (PR #100787)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100787 Reorder for consistency, so the same types for v/s are together. >From 794f20ecd9df0024481842bce8dd9e7d9e3684cb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 26 Jul 2024 17:08:26 +0400 Subject: [PATCH] AMDGPU: Cleanup immediate selection patterns Reorder for consistency, so the same types for v/s are together. --- llvm/lib/Target/AMDGPU/SIInstructions.td | 79 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d2101654d2acb..bcf778b31d276 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2138,19 +2138,26 @@ def : GCNPat < /** Immediate Patterns **/ /** == **/ +// FIXME: Remove VGPRImm. Should be inferrable from register bank. + def : GCNPat < (VGPRImm<(i32 imm)>:$imm), (V_MOV_B32_e32 imm:$imm) >; def : GCNPat < - (VGPRImm<(f32 fpimm)>:$imm), - (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) + (i32 imm:$imm), + (S_MOV_B32 imm:$imm) >; def : GCNPat < - (i32 imm:$imm), - (S_MOV_B32 imm:$imm) + (p5 frameindex:$fi), + (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi))) +>; + +def : GCNPat < + (p5 frameindex:$fi), + (S_MOV_B32 (p5 (frameindex_to_targetframeindex $fi))) >; def : GCNPat < @@ -2168,40 +2175,34 @@ def : GCNPat < (V_MOV_B32_e32 imm:$imm) >; -// FIXME: Workaround for ordering issue with peephole optimizer where -// a register class copy interferes with immediate folding. Should -// use s_mov_b32, which can be shrunk to s_movk_i32 def : GCNPat < - (VGPRImm<(f16 fpimm)>:$imm), - (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) + (i16 imm:$imm), + (S_MOV_B32 imm:$imm) >; def : GCNPat < - (VGPRImm<(bf16 fpimm)>:$imm), - (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm))) + (VGPRImm<(f16 fpimm)>:$imm), + (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) >; -// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit -// immediate and wil be expanded as needed, but we will only use these patterns -// for values which can be encoded. def : GCNPat < - (VGPRImm<(i64 imm)>:$imm), - (V_MOV_B64_PSEUDO imm:$imm) + (f16 fpimm:$imm), + (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) >; def : GCNPat < - (VGPRImm<(f64 fpimm)>:$imm), - (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm))) + (VGPRImm<(bf16 fpimm)>:$imm), + (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm))) >; def : GCNPat < - (i64 imm:$imm), - (S_MOV_B64_IMM_PSEUDO imm:$imm) + (bf16 fpimm:$imm), + (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) >; def : GCNPat < - (f64 fpimm:$imm), - (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm))) + (VGPRImm<(f32 fpimm)>:$imm), + (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) >; def : GCNPat < @@ -2210,31 +2211,38 @@ def : GCNPat < >; def : GCNPat < - (f16 fpimm:$imm), - (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) + (VGPRImm<(i64 imm)>:$imm), + (V_MOV_B64_PSEUDO imm:$imm) >; def : GCNPat < - (bf16 fpimm:$imm), - (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) + (i64 InlineImm64:$imm), + (S_MOV_B64 InlineImm64:$imm) >; def : GCNPat < - (p5 frameindex:$fi), - (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi))) + (i64 imm:$imm), + (S_MOV_B64_IMM_PSEUDO imm:$imm) >; def : GCNPat < - (p5 frameindex:$fi), - (S_MOV_B32 (p5 (frameindex_to_targetframeindex $fi))) + (VGPRImm<(f64 fpimm)>:$imm), + (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm))) >; +// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit +// immediate and wil be expanded as needed, but we will only use these patterns +// for values which can be encoded. def : GCNPat < - (i64 InlineImm64:$imm), - (S_MOV_B64 InlineImm64:$imm) + (f64 InlineImmFP64:$imm), + (S_MOV_B64 (i64 (bitcast_fpimm_to_i64 $imm))) +>; + +def : GCNPat < + (f64 fpimm:$imm), + (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm))) >; -// Set to sign-extended 64-bit value (true = -1, false = 0) // Set to sign-extended 64-bit value (true = -1, false = 0) def : GCNPat <(i1 imm:$imm), (S_MOV_B64 imm:$imm)> { @@ -2246,11 +2254,6 @@ def : GCNPat <(i1 imm:$imm), let WaveSizePredicate = isWave32; } -def : GCNPat < - (f64 InlineImmFP64:$imm), - (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineImmFP64:$imm))) ->; - /** == **/ /** Intrinsic Patterns **/ /** == **/ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Lower fcNormal is.fpclass to compare with inf (PR #100389)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100389 >From fcfbc51749e1a8289d88eeea504cdf2af94c6cf0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 1 Feb 2023 09:06:59 -0400 Subject: [PATCH] DAG: Lower fcNormal is.fpclass to compare with inf Looks worse for x86 without the fabs check. Not sure if this is useful for any targets. --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 25 +++ 1 file changed, 25 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 18cd368e24259..dcc65549d7a0e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8673,6 +8673,31 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, IsOrdered ? OrderedOp : UnorderedOp); } } + +if (FPTestMask == fcNormal) { + // TODO: Handle unordered + ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT; + ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE; + + if (isCondCodeLegalOrCustom(IsFiniteOp, + OperandVT.getScalarType().getSimpleVT()) && + isCondCodeLegalOrCustom(IsNormalOp, + OperandVT.getScalarType().getSimpleVT()) && + isFAbsFree(OperandVT)) { +// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal) +SDValue Inf = +DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); +SDValue SmallestNormal = DAG.getConstantFP( +APFloat::getSmallestNormalized(Semantics), DL, OperandVT); + +SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); +SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp); +SDValue IsNormal = +DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp); +unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND; +return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal); + } +} } // Some checks may be represented as inversion of simpler check, for example ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100380 >From fc46244e25e7dc86354a6fb42316788eab883198 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 1 Feb 2023 09:52:34 -0400 Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp InstCombine also should have taken care of this, but this should be helpful when the fcmp based lowering strategy tries to combine multiple tests. --- llvm/lib/CodeGen/CodeGenCommonISel.cpp| 2 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 25 +++- llvm/test/CodeGen/AArch64/isinf.ll| 22 ++- llvm/test/CodeGen/X86/is_fpclass-fp80.ll | 52 +++ llvm/test/CodeGen/X86/is_fpclass.ll | 137 +- 5 files changed, 127 insertions(+), 111 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 88c643c568027..942cf442e9098 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) { case fcSubnormal | fcZero | fcNan: return InvertedTest; case fcInf | fcNan: + case fcPosInf | fcNan: + case fcNegInf | fcNan: // If we're trying to use fcmp, we can take advantage of the nan check // behavior of the compare (but this is more instructions in the integer // expansion). diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1e12d7937ba79..18cd368e24259 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8628,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, return DAG.getSetCC(DL, ResultVT, Op, Op, IsInvertedFP ? ISD::SETO : ISD::SETUO); -bool IsOrderedInf = FPTestMask == fcInf; -if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) && -isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode - : UnorderedCmpOpcode, -OperandVT.getScalarType().getSimpleVT()) && -isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) { +if (OrderedFPTestMask == fcInf && +isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode + : UnorderedCmpOpcode, +OperandVT.getScalarType().getSimpleVT())) { // isinf(x) --> fabs(x) == inf SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); return DAG.getSetCC(DL, ResultVT, Abs, Inf, - IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode); + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); +} + +if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) { + // isposinf(x) --> x == inf + // isneginf(x) --> x == -inf + // isposinf(x) || nan --> x u== inf + // isneginf(x) || nan --> x u== -inf + + SDValue Inf = DAG.getConstantFP( + APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL, + OperandVT); + return DAG.getSetCC(DL, ResultVT, Op, Inf, + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); } if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) { diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index 834417b98743a..458bd7eeba16c 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -58,14 +58,22 @@ define i32 @replace_isinf_call_f64(double %x) { define i32 @replace_isinf_call_f128(fp128 %x) { ; CHECK-LABEL: replace_isinf_call_f128: ; CHECK: // %bb.0: -; CHECK-NEXT:str q0, [sp, #-16]! -; CHECK-NEXT:.cfi_def_cfa_offset 16 -; CHECK-NEXT:ldp x9, x8, [sp], #16 -; CHECK-NEXT:and x8, x8, #0x7fff -; CHECK-NEXT:eor x8, x8, #0x7fff -; CHECK-NEXT:orr x8, x9, x8 -; CHECK-NEXT:cmp x8, #0 +; CHECK-NEXT:sub sp, sp, #32 +; CHECK-NEXT:str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT:.cfi_def_cfa_offset 32 +; CHECK-NEXT:.cfi_offset w30, -16 +; CHECK-NEXT:str q0, [sp] +; CHECK-NEXT:ldrb w8, [sp, #15] +; CHECK-NEXT:and w8, w8, #0x7f +; CHECK-NEXT:strb w8, [sp, #15] +; CHECK-NEXT:adrp x8, .LCPI3_0 +; CHECK-NEXT:ldr q0, [sp] +; CHECK-NEXT:ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT:bl __eqtf2 +; CHECK-NEXT:cmp w0, #0 +; CHECK-NEXT:ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT:cset w0, eq +; CHECK-NEXT:add sp, sp, #32 ; CHECK-NEXT:ret %abs = tail call fp128 @llvm.fabs.f128(fp128 %x) %cmpinf = fcmp oeq fp128 %abs, 0xL7FFF diff --git
[llvm-branch-commits] [llvm] DAG: Handle lowering unordered compare with inf (PR #100378)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100378 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)
arsenm wrote: ### Merge activity * **Jul 26, 4:56 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100357). https://github.com/llvm/llvm-project/pull/100357 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)
arsenm wrote: ### Merge activity * **Jul 26, 4:56 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100333). https://github.com/llvm/llvm-project/pull/100333 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100357 >From 88bb03cf2b3587d08ee5b73fbacb7b6c3bec1b40 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 20 Jul 2024 14:24:23 +0400 Subject: [PATCH 1/3] CodeGen: Remove MachineModuleInfo reference from MachineFunction This avoids another unserializable field. Move the DbgInfoAvailable field into the AsmPrinter, which is only really a cache/convenience bit for checking a direct IR module metadata check. --- llvm/include/llvm/CodeGen/AsmPrinter.h | 6 ++ llvm/include/llvm/CodeGen/MachineFunction.h| 18 -- llvm/include/llvm/CodeGen/MachineModuleInfo.h | 6 -- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 17 - llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 4 ++-- .../CodeGen/AsmPrinter/DebugHandlerBase.cpp| 4 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- llvm/lib/CodeGen/MachineFunction.cpp | 12 ++-- llvm/lib/CodeGen/MachineFunctionAnalysis.cpp | 2 +- llvm/lib/CodeGen/MachineModuleInfo.cpp | 5 + llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 4 +--- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 12 ++-- llvm/tools/llvm-reduce/ReducerWorkItem.cpp | 2 +- .../CodeGen/AArch64SelectionDAGTest.cpp| 4 ++-- llvm/unittests/CodeGen/InstrRefLDVTest.cpp | 2 +- llvm/unittests/CodeGen/MFCommon.inc| 3 ++- .../SelectionDAGAddressAnalysisTest.cpp| 2 +- .../CodeGen/SelectionDAGPatternMatchTest.cpp | 2 +- .../AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp | 3 ++- llvm/unittests/Target/AMDGPU/PALMetadata.cpp | 2 +- .../Target/RISCV/RISCVInstrInfoTest.cpp| 2 +- 21 files changed, 54 insertions(+), 60 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index f57be39076a783..36d1b479738704 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -225,6 +225,9 @@ class AsmPrinter : public MachineFunctionPass { /// split stack prologue. bool HasNoSplitStack = false; + /// True if debugging information is available in this module. + bool DbgInfoAvailable = false; + protected: explicit AsmPrinter(TargetMachine , std::unique_ptr Streamer); @@ -430,6 +433,9 @@ class AsmPrinter : public MachineFunctionPass { /// Get the CFISection type for the module. CFISection getModuleCFISectionType() const { return ModuleCFISection; } + /// Returns true if valid debug info is present. + bool hasDebugInfo() const { return DbgInfoAvailable; } + bool needsSEHMoves(); /// Since emitting CFI unwind information is entangled with supporting the diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 6e7292abeddbbd..142570b9ce551e 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -260,7 +260,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { const LLVMTargetMachine const TargetSubtargetInfo *STI; MCContext - MachineModuleInfo // RegInfo - Information about each register in use in the function. MachineRegisterInfo *RegInfo; @@ -395,15 +394,15 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { /// \} - /// Clear all the members of this MachineFunction, but the ones used - /// to initialize again the MachineFunction. - /// More specifically, this deallocates all the dynamically allocated - /// objects and get rid of all the XXXInfo data structure, but keep - /// unchanged the references to Fn, Target, MMI, and FunctionNumber. + /// Clear all the members of this MachineFunction, but the ones used to + /// initialize again the MachineFunction. More specifically, this deallocates + /// all the dynamically allocated objects and get rid of all the XXXInfo data + /// structure, but keep unchanged the references to Fn, Target, and + /// FunctionNumber. void clear(); /// Allocate and initialize the different members. /// In particular, the XXXInfo data structure. - /// \pre Fn, Target, MMI, and FunctionNumber are properly set. + /// \pre Fn, Target, and FunctionNumber are properly set. void init(); public: @@ -632,8 +631,8 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { const static unsigned int DebugOperandMemNumber; MachineFunction(Function , const LLVMTargetMachine , - const TargetSubtargetInfo , unsigned FunctionNum, - MachineModuleInfo ); + const TargetSubtargetInfo , MCContext , + unsigned FunctionNum); MachineFunction(const MachineFunction &) = delete; MachineFunction =(const MachineFunction &) = delete; ~MachineFunction(); @@ -665,7 +664,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { GISelChangeObserver *getObserver() const { return Observer; } - MachineModuleInfo () const {
[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100333 >From 9f6b09e1041ed88c95a7c51ac441769f4f82cfd6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 13:11:04 +0400 Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks I assume getSubprogram will do the correct thing in hasDebugInfo, and this is redundant with the debug_compile_units distance check. This is in preparation for removing the field. --- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 6c70c47de8822..ed99eb3c459e5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { Ty->getTag() == dwarf::DW_TAG_unspecified_type; } -static bool hasDebugInfo(const MachineModuleInfo *MMI, - const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) -return false; +static bool hasDebugInfo(const MachineFunction *MF) { auto *SP = MF->getFunction().getSubprogram(); if (!SP) return false; @@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI, void DebugHandlerBase::beginFunction(const MachineFunction *MF) { PrevInstBB = nullptr; - if (!Asm || !hasDebugInfo(MMI, MF)) { + if (!Asm || !hasDebugInfo(MF)) { skippedNonDebugFunction(); return; } @@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() { } void DebugHandlerBase::endFunction(const MachineFunction *MF) { - if (Asm && hasDebugInfo(MMI, MF)) + if (Asm && hasDebugInfo(MF)) endFunctionImpl(MF); DbgValues.clear(); DbgLabels.clear(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5f1f315c5ab24..fbce7e92b7781 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1148,14 +1148,15 @@ sortGlobalExprs(SmallVectorImpl ) { void DwarfDebug::beginModule(Module *M) { DebugHandlerBase::beginModule(M); - if (!Asm || !MMI->hasDebugInfo()) + if (!Asm) return; unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); + if (NumDebugCUs == 0) +return; + assert(NumDebugCUs > 0 && "Asm unexpectedly initialized"); - assert(MMI->hasDebugInfo() && - "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; DenseMap> GVMap; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0b654abd2814c..b4eba07afe7c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() { // Emit initial .loc debug directive for correct relocation symbol data. if (const DISubprogram *SP = MF->getFunction().getSubprogram()) { assert(SP->getUnit()); -if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo()) +if (!SP->getUnit()->isDebugDirectivesOnly()) emitInitialRawDwarfLocDirective(*MF); } } @@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream , if (HasFullDebugInfo) break; } - if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) + if (HasFullDebugInfo) O << ", debug"; O << "\n"; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Move current call site out of MachineModuleInfo (PR #100369)
arsenm wrote: ### Merge activity * **Jul 26, 3:21 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100369). https://github.com/llvm/llvm-project/pull/100369 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100357 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100333 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Move current call site out of MachineModuleInfo (PR #100369)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100369 >From 3069e94a57f37b11c466b5cd1b71fde4f538a861 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 17:00:00 +0400 Subject: [PATCH 1/2] CodeGen: Move current call site out of MachineModuleInfo I do not know understand what this is for, but it's only used in SelectionDAGBuilder, so move it to FunctionLoweringInfo like other function scope DAG builder state. The intrinsics are not documented in the LangRef or Intrinsics.td. This removes the last piece of codegen state from MachineModuleInfo. --- .../llvm/CodeGen/FunctionLoweringInfo.h | 17 + llvm/include/llvm/CodeGen/MachineModuleInfo.h | 24 --- llvm/lib/CodeGen/MachineModuleInfo.cpp| 2 -- .../SelectionDAG/SelectionDAGBuilder.cpp | 10 4 files changed, 21 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index 45a47d7333e35..fa75d883e451c 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -183,11 +183,28 @@ class FunctionLoweringInfo { std::vector > PHINodesToUpdate; unsigned OrigNumPHINodesToUpdate; + /// \name Exception Handling + /// \{ + /// If the current MBB is a landing pad, the exception pointer and exception /// selector registers are copied into these virtual registers by /// SelectionDAGISel::PrepareEHLandingPad(). unsigned ExceptionPointerVirtReg, ExceptionSelectorVirtReg; + /// The current call site index being processed, if any. 0 if none. + unsigned CurCallSite = 0; + // TODO: Ideally, what we'd like is to have a switch that allows emitting + // synchronous (precise at call-sites only) CFA into .eh_frame. However, + // even under this switch, we'd like .debug_frame to be precise when using + // -g. At this moment, there's no way to specify that some CFI directives + // go into .eh_frame only, while others go into .debug_frame only. + + /// Set the call site currently being processed. + void setCurrentCallSite(unsigned Site) { CurCallSite = Site; } + + /// Get the call site currently being processed, if any. Return zero if none. + unsigned getCurrentCallSite() { return CurCallSite; } + /// Collection of dbg.declare instructions handled after argument /// lowering and before ISel proper. SmallPtrSet PreprocessedDbgDeclares; diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index dfa0e993ec06a..f054c56bb641c 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -99,20 +99,6 @@ class MachineModuleInfo { /// want. MachineModuleInfoImpl *ObjFileMMI; - /// \name Exception Handling - /// \{ - - /// The current call site index being processed, if any. 0 if none. - unsigned CurCallSite = 0; - - /// \} - - // TODO: Ideally, what we'd like is to have a switch that allows emitting - // synchronous (precise at call-sites only) CFA into .eh_frame. However, - // even under this switch, we'd like .debug_frame to be precise when using - // -g. At this moment, there's no way to specify that some CFI directives - // go into .eh_frame only, while others go into .debug_frame only. - /// True if debugging information is available in this module. bool DbgInfoAvailable = false; @@ -185,16 +171,6 @@ class MachineModuleInfo { /// Returns true if valid debug info is present. bool hasDebugInfo() const { return DbgInfoAvailable; } - /// \name Exception Handling - /// \{ - - /// Set the call site currently being processed. - void setCurrentCallSite(unsigned Site) { CurCallSite = Site; } - - /// Get the call site currently being processed, if any. return zero if - /// none. - unsigned getCurrentCallSite() { return CurCallSite; } - /// \} }; // End class MachineModuleInfo diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 150ab363c8fcd..f382df1d2a6e0 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -26,7 +26,6 @@ MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; - CurCallSite = 0; NextFnNum = 0; DbgInfoAvailable = false; } @@ -46,7 +45,6 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &) MachineFunctions(std::move(MMI.MachineFunctions)) { Context.setObjectFileInfo(TM.getObjFileLowering()); ObjFileMMI = MMI.ObjFileMMI; - CurCallSite = MMI.CurCallSite; ExternalContext = MMI.ExternalContext; TheModule = MMI.TheModule; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1791f1b503379..c554c0f5b6fd7 100644 ---
[llvm-branch-commits] [llvm] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo (PR #100368)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100368 >From db429d7de96c0b5c80b015adc73a13025f93d4ad Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 17 Apr 2022 10:28:14 -0400 Subject: [PATCH 1/2] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo This is only used by x86 and only used in the AsmPrinter module pass. I think implementing this by looking at the underlying IR types instead of the selected instructions is a pretty horrifying implementation, but it's still available in the AsmPrinter. This is https://reviews.llvm.org/D123933 resurrected. I still don't know what the point of emitting _fltused is, but this approach of looking at the IR types probably isn't the right way to do this in the first place. If the intent is report any FP instructions, this will miss any implicitly introduced ones during codegen. Also don't know why just unconditionally emitting it isn't an option. The last review mentioned the ARMs might want to emit this, but I'm not going to go fix that. If someone wants to emit this on ARM, they can move this to a common helper or analysis somewhere. --- llvm/include/llvm/CodeGen/MachineModuleInfo.h | 8 -- llvm/lib/CodeGen/MachineModuleInfo.cpp| 1 - .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 27 --- llvm/lib/Target/X86/X86AsmPrinter.cpp | 25 - 4 files changed, 24 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index 97b439c726b0a..dfa0e993ec06a 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -116,10 +116,6 @@ class MachineModuleInfo { /// True if debugging information is available in this module. bool DbgInfoAvailable = false; - /// True if this module is being built for windows/msvc, and uses floating - /// point. This is used to emit an undefined reference to _fltused. - bool UsesMSVCFloatingPoint = false; - /// Maps IR Functions to their corresponding MachineFunctions. DenseMap> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -189,10 +185,6 @@ class MachineModuleInfo { /// Returns true if valid debug info is present. bool hasDebugInfo() const { return DbgInfoAvailable; } - bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; } - - void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; } - /// \name Exception Handling /// \{ diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 088e76029f1a3..150ab363c8fcd 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -28,7 +28,6 @@ void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; NextFnNum = 0; - UsesMSVCFloatingPoint = false; DbgInfoAvailable = false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 401d23b22adcd..84331d257a3d0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -417,30 +417,6 @@ void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage ) const { MachineFunctionPass::getAnalysisUsage(AU); } -static void computeUsesMSVCFloatingPoint(const Triple , const Function , - MachineModuleInfo ) { - // Only needed for MSVC - if (!TT.isWindowsMSVCEnvironment()) -return; - - // If it's already set, nothing to do. - if (MMI.usesMSVCFloatingPoint()) -return; - - for (const Instruction : instructions(F)) { -if (I.getType()->isFPOrFPVectorTy()) { - MMI.setUsesMSVCFloatingPoint(true); - return; -} -for (const auto : I.operands()) { - if (Op->getType()->isFPOrFPVectorTy()) { -MMI.setUsesMSVCFloatingPoint(true); -return; - } -} - } -} - PreservedAnalyses SelectionDAGISelPass::run(MachineFunction , MachineFunctionAnalysisManager ) { @@ -802,9 +778,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction ) { } } - // Determine if floating point is used for msvc - computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, *CurDAG->getMMI()); - // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 0c2c6bf7f8b70..9d86a9c9d1609 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -975,6
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 49db2b2b9855d18df6449b6dedf7e50ccc1d6265 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- 3 files changed, 210 insertions(+), 199 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0b1ecc002ae25..8ae236850b982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; })) NElts = 1; break; + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; } return LT.first * NElts * InstRate; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..b86e99558377b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100520 >From 1d17da3e7cd5253d0c7a9bb8acc5989d1e5ba615 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519 >From 3d683da35b98db6dd0b5a94692b735765a6f776f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 +- .../Analysis/CostModel/X86/arith-overflow.ll | 120 +- 2 files changed, 96 insertions(+), 91 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index c5da46af04367..28d53042d4c21 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 148
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
arsenm wrote: ### Merge activity * **Jul 25, 4:25 PM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100522). https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 949edfeeecddb315bf95dd82be99c57a4711c30a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- 3 files changed, 210 insertions(+), 199 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0b1ecc002ae25..8ae236850b982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes , case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [](MVT M) { return M == LT.second; })) NElts = 1; break; + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; } return LT.first * NElts * InstRate; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..b86e99558377b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 85c14e04d3e27c8609fac2890eb475963d7f008b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 + llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++--- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..e290f0631ff16 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -24,11 +24,11 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100520 >From 39ca2c43676bf82f97f8cce2e09091e7d849dfab Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519 >From 5a2e8acf2b7e4aafae237a035f81557d97948a29 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 +- .../Analysis/CostModel/X86/arith-overflow.ll | 120 +- 2 files changed, 96 insertions(+), 91 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index c5da46af04367..28d53042d4c21 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 148
[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100514 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Fix special casing vectorization costs of saturating add/sub (PR #97463)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/97463 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100522 >From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:47:03 +0400 Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics --- llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 + 1 file changed, 310 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll new file mode 100644 index 0..f65615b07abc0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -0,0 +1,310 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s +; END. + +define void @abs_nonpoison() { +; FAST-LABEL: 'abs_nonpoison' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100522 >From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:47:03 +0400 Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics --- llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 + 1 file changed, 310 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll new file mode 100644 index 0..f65615b07abc0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -0,0 +1,310 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s +; END. + +define void @abs_nonpoison() { +; FAST-LABEL: 'abs_nonpoison' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of
[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)
arsenm wrote: ping https://github.com/llvm/llvm-project/pull/96760 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100521 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100520 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100519 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100523** https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100523 None >From ca78bfb62816c21172101c1f00dcead3efc472dc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 + llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++--- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index 133b95609bc15..623e02eb8239d 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100521 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100520 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100522 None >From 330c0e2bf40cf96b1c7778636fa739cb0c1a1f11 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:47:03 +0400 Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics --- llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 341 + 1 file changed, 341 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll new file mode 100644 index 0..133b95609bc15 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -0,0 +1,341 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s +; END. + +declare i64@llvm.abs.i64(i64, i1 immarg) +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1 immarg) +declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1 immarg) +declare <5 x i64> @llvm.abs.v5i64(<5 x i64>, i1 immarg) +declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1 immarg) + +declare i32@llvm.abs.i32(i32, i1 immarg) +declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1 immarg) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1 immarg) +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) +declare <9 x i32> @llvm.abs.v9i32(<9 x i32>, i1 immarg) +declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg) + +declare i16@llvm.abs.i16(i16, i1 immarg) +declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1 immarg) +declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1 immarg) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1 immarg) +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) +declare <17 x i16> @llvm.abs.v17i16(<17 x i16>, i1 immarg) +declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1 immarg) + +declare i8 @llvm.abs.i8(i8, i1 immarg) +declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1 immarg) +declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1 immarg) +declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1 immarg) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1 immarg) +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg) +declare <33 x i8> @llvm.abs.v33i8(<33 x i8>, i1 immarg) +declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1 immarg) + +define i32 @abs_nonpoison(i32 %arg) { +; FAST-LABEL: 'abs_nonpoison' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call
[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100519 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100520 None >From 689ea8720d60ae6fc1226b929f5333adae1ce77c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100519 None >From c98dcbf907a6b5d085b89f06d49ee8a3bc3e9dd2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 +- .../Analysis/CostModel/X86/arith-overflow.ll | 120 +- .../CostModel/X86/intrinsic-cost-kinds.ll | 6 +- 3 files changed, 99 insertions(+), 94 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index ba745262d1890..2d907d87b057c 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16>
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100513 >From 80b236530103a66b8939aeb26f1d5c2be9043b5c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 2 Jul 2024 21:28:30 +0200 Subject: [PATCH] AMDGPU: Add baseline test for vectorize of integer min/max --- .../SLPVectorizer/AMDGPU/min_max.ll | 366 ++ 1 file changed, 366 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll new file mode 100644 index 0..47b0dbd6b2cff --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll @@ -0,0 +1,366 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s + +define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @uadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @uadd_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @uadd_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @usub_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @usub_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @usub_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @sadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] =
[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100514 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100514 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100514 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100513 None >From 7a8f09d99fa0a90fc7fe442d87103e66ea2ff806 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 2 Jul 2024 21:28:30 +0200 Subject: [PATCH] AMDGPU: Add baseline test for vectorize of integer min/max --- .../SLPVectorizer/AMDGPU/min_max.ll | 366 ++ 1 file changed, 366 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll new file mode 100644 index 0..47b0dbd6b2cff --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll @@ -0,0 +1,366 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s + +define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @uadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @uadd_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @uadd_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @usub_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @usub_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @usub_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @sadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] =
[llvm-branch-commits] [llvm] CodeGen: Move current call site out of MachineModuleInfo (PR #100369)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100369 >From 4f1d8d439c2c0ff5742a98f8fe42d8212d91f556 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 17:00:00 +0400 Subject: [PATCH 1/2] CodeGen: Move current call site out of MachineModuleInfo I do not know understand what this is for, but it's only used in SelectionDAGBuilder, so move it to FunctionLoweringInfo like other function scope DAG builder state. The intrinsics are not documented in the LangRef or Intrinsics.td. This removes the last piece of codegen state from MachineModuleInfo. --- .../llvm/CodeGen/FunctionLoweringInfo.h | 17 + llvm/include/llvm/CodeGen/MachineModuleInfo.h | 24 --- llvm/lib/CodeGen/MachineModuleInfo.cpp| 2 -- .../SelectionDAG/SelectionDAGBuilder.cpp | 10 4 files changed, 21 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index 45a47d7333e35..fa75d883e451c 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -183,11 +183,28 @@ class FunctionLoweringInfo { std::vector > PHINodesToUpdate; unsigned OrigNumPHINodesToUpdate; + /// \name Exception Handling + /// \{ + /// If the current MBB is a landing pad, the exception pointer and exception /// selector registers are copied into these virtual registers by /// SelectionDAGISel::PrepareEHLandingPad(). unsigned ExceptionPointerVirtReg, ExceptionSelectorVirtReg; + /// The current call site index being processed, if any. 0 if none. + unsigned CurCallSite = 0; + // TODO: Ideally, what we'd like is to have a switch that allows emitting + // synchronous (precise at call-sites only) CFA into .eh_frame. However, + // even under this switch, we'd like .debug_frame to be precise when using + // -g. At this moment, there's no way to specify that some CFI directives + // go into .eh_frame only, while others go into .debug_frame only. + + /// Set the call site currently being processed. + void setCurrentCallSite(unsigned Site) { CurCallSite = Site; } + + /// Get the call site currently being processed, if any. Return zero if none. + unsigned getCurrentCallSite() { return CurCallSite; } + /// Collection of dbg.declare instructions handled after argument /// lowering and before ISel proper. SmallPtrSet PreprocessedDbgDeclares; diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index f69be67ee9f17..310cc4b2abb77 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -99,20 +99,6 @@ class MachineModuleInfo { /// want. MachineModuleInfoImpl *ObjFileMMI; - /// \name Exception Handling - /// \{ - - /// The current call site index being processed, if any. 0 if none. - unsigned CurCallSite = 0; - - /// \} - - // TODO: Ideally, what we'd like is to have a switch that allows emitting - // synchronous (precise at call-sites only) CFA into .eh_frame. However, - // even under this switch, we'd like .debug_frame to be precise when using - // -g. At this moment, there's no way to specify that some CFI directives - // go into .eh_frame only, while others go into .debug_frame only. - /// Maps IR Functions to their corresponding MachineFunctions. DenseMap> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -179,16 +165,6 @@ class MachineModuleInfo { return const_cast(this)->getObjFileInfo(); } - /// \name Exception Handling - /// \{ - - /// Set the call site currently being processed. - void setCurrentCallSite(unsigned Site) { CurCallSite = Site; } - - /// Get the call site currently being processed, if any. return zero if - /// none. - unsigned getCurrentCallSite() { return CurCallSite; } - /// \} }; // End class MachineModuleInfo diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 23de726a2ab97..26b38ceec393c 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -36,7 +36,6 @@ MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; - CurCallSite = 0; NextFnNum = 0; } @@ -55,7 +54,6 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &) MachineFunctions(std::move(MMI.MachineFunctions)) { Context.setObjectFileInfo(TM.getObjFileLowering()); ObjFileMMI = MMI.ObjFileMMI; - CurCallSite = MMI.CurCallSite; ExternalContext = MMI.ExternalContext; TheModule = MMI.TheModule; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 57a483a5a57ce..c554c0f5b6fd7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++
[llvm-branch-commits] [llvm] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo (PR #100368)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100368 >From 8991fa261a7705f99ac5729b6bbb1cfeb53e1263 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 17 Apr 2022 10:28:14 -0400 Subject: [PATCH 1/2] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo This is only used by x86 and only used in the AsmPrinter module pass. I think implementing this by looking at the underlying IR types instead of the selected instructions is a pretty horrifying implementation, but it's still available in the AsmPrinter. This is https://reviews.llvm.org/D123933 resurrected. I still don't know what the point of emitting _fltused is, but this approach of looking at the IR types probably isn't the right way to do this in the first place. If the intent is report any FP instructions, this will miss any implicitly introduced ones during codegen. Also don't know why just unconditionally emitting it isn't an option. The last review mentioned the ARMs might want to emit this, but I'm not going to go fix that. If someone wants to emit this on ARM, they can move this to a common helper or analysis somewhere. --- llvm/include/llvm/CodeGen/MachineModuleInfo.h | 8 -- llvm/lib/CodeGen/MachineModuleInfo.cpp| 1 - .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 27 --- llvm/lib/Target/X86/X86AsmPrinter.cpp | 25 - 4 files changed, 24 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index b39db93b021b5..f69be67ee9f17 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -113,10 +113,6 @@ class MachineModuleInfo { // -g. At this moment, there's no way to specify that some CFI directives // go into .eh_frame only, while others go into .debug_frame only. - /// True if this module is being built for windows/msvc, and uses floating - /// point. This is used to emit an undefined reference to _fltused. - bool UsesMSVCFloatingPoint = false; - /// Maps IR Functions to their corresponding MachineFunctions. DenseMap> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -183,10 +179,6 @@ class MachineModuleInfo { return const_cast(this)->getObjFileInfo(); } - bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; } - - void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; } - /// \name Exception Handling /// \{ diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 12dec288b3ce2..23de726a2ab97 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -38,7 +38,6 @@ void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; NextFnNum = 0; - UsesMSVCFloatingPoint = false; } void MachineModuleInfo::finalize() { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 401d23b22adcd..84331d257a3d0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -417,30 +417,6 @@ void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage ) const { MachineFunctionPass::getAnalysisUsage(AU); } -static void computeUsesMSVCFloatingPoint(const Triple , const Function , - MachineModuleInfo ) { - // Only needed for MSVC - if (!TT.isWindowsMSVCEnvironment()) -return; - - // If it's already set, nothing to do. - if (MMI.usesMSVCFloatingPoint()) -return; - - for (const Instruction : instructions(F)) { -if (I.getType()->isFPOrFPVectorTy()) { - MMI.setUsesMSVCFloatingPoint(true); - return; -} -for (const auto : I.operands()) { - if (Op->getType()->isFPOrFPVectorTy()) { -MMI.setUsesMSVCFloatingPoint(true); -return; - } -} - } -} - PreservedAnalyses SelectionDAGISelPass::run(MachineFunction , MachineFunctionAnalysisManager ) { @@ -802,9 +778,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction ) { } } - // Determine if floating point is used for msvc - computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, *CurDAG->getMMI()); - // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 0c2c6bf7f8b70..9d86a9c9d1609 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -975,6
[llvm-branch-commits] [llvm] CodeGen: Remove MachineModuleInfo reference from MachineFunction (PR #100357)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100357 >From 8aff4bdfa99b782379a5383af548c4250605ed63 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 20 Jul 2024 14:24:23 +0400 Subject: [PATCH 1/3] CodeGen: Remove MachineModuleInfo reference from MachineFunction This avoids another unserializable field. Move the DbgInfoAvailable field into the AsmPrinter, which is only really a cache/convenience bit for checking a direct IR module metadata check. --- llvm/include/llvm/CodeGen/AsmPrinter.h | 6 ++ llvm/include/llvm/CodeGen/MachineFunction.h| 18 -- llvm/include/llvm/CodeGen/MachineModuleInfo.h | 6 -- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 17 - llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 4 ++-- .../CodeGen/AsmPrinter/DebugHandlerBase.cpp| 4 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- llvm/lib/CodeGen/MachineFunction.cpp | 12 ++-- llvm/lib/CodeGen/MachineFunctionAnalysis.cpp | 2 +- llvm/lib/CodeGen/MachineModuleInfo.cpp | 5 + .../SelectionDAG/SelectionDAGBuilder.cpp | 6 +++--- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 4 +--- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 12 ++-- llvm/tools/llvm-reduce/ReducerWorkItem.cpp | 2 +- .../CodeGen/AArch64SelectionDAGTest.cpp| 4 ++-- llvm/unittests/CodeGen/InstrRefLDVTest.cpp | 2 +- llvm/unittests/CodeGen/MFCommon.inc| 3 ++- .../SelectionDAGAddressAnalysisTest.cpp| 2 +- .../CodeGen/SelectionDAGPatternMatchTest.cpp | 2 +- .../AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp | 3 ++- llvm/unittests/Target/AMDGPU/PALMetadata.cpp | 2 +- .../Target/RISCV/RISCVInstrInfoTest.cpp| 2 +- 22 files changed, 57 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index f57be39076a78..36d1b47973870 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -225,6 +225,9 @@ class AsmPrinter : public MachineFunctionPass { /// split stack prologue. bool HasNoSplitStack = false; + /// True if debugging information is available in this module. + bool DbgInfoAvailable = false; + protected: explicit AsmPrinter(TargetMachine , std::unique_ptr Streamer); @@ -430,6 +433,9 @@ class AsmPrinter : public MachineFunctionPass { /// Get the CFISection type for the module. CFISection getModuleCFISectionType() const { return ModuleCFISection; } + /// Returns true if valid debug info is present. + bool hasDebugInfo() const { return DbgInfoAvailable; } + bool needsSEHMoves(); /// Since emitting CFI unwind information is entangled with supporting the diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 6e7292abeddbb..142570b9ce551 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -260,7 +260,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { const LLVMTargetMachine const TargetSubtargetInfo *STI; MCContext - MachineModuleInfo // RegInfo - Information about each register in use in the function. MachineRegisterInfo *RegInfo; @@ -395,15 +394,15 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { /// \} - /// Clear all the members of this MachineFunction, but the ones used - /// to initialize again the MachineFunction. - /// More specifically, this deallocates all the dynamically allocated - /// objects and get rid of all the XXXInfo data structure, but keep - /// unchanged the references to Fn, Target, MMI, and FunctionNumber. + /// Clear all the members of this MachineFunction, but the ones used to + /// initialize again the MachineFunction. More specifically, this deallocates + /// all the dynamically allocated objects and get rid of all the XXXInfo data + /// structure, but keep unchanged the references to Fn, Target, and + /// FunctionNumber. void clear(); /// Allocate and initialize the different members. /// In particular, the XXXInfo data structure. - /// \pre Fn, Target, MMI, and FunctionNumber are properly set. + /// \pre Fn, Target, and FunctionNumber are properly set. void init(); public: @@ -632,8 +631,8 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { const static unsigned int DebugOperandMemNumber; MachineFunction(Function , const LLVMTargetMachine , - const TargetSubtargetInfo , unsigned FunctionNum, - MachineModuleInfo ); + const TargetSubtargetInfo , MCContext , + unsigned FunctionNum); MachineFunction(const MachineFunction &) = delete; MachineFunction =(const MachineFunction &) = delete; ~MachineFunction(); @@ -665,7 +664,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { GISelChangeObserver *getObserver() const {
[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100353 >From 708970d494353c8a2e5dcf66fb4fc0554132d518 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 15:53:01 +0400 Subject: [PATCH] FastISel: Do not check for module debug info I don't see the point of this check and SelectionDAG does not perform it. In the normal usecase, if there's no debug info the debug intrinsics would not be there in the first place. --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 16 1 file changed, 16 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index ef9f783355190..e255bbaa92b16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1203,11 +1203,6 @@ void FastISel::handleDbgInfo(const Instruction *II) { if (DbgLabelRecord *DLR = dyn_cast()) { assert(DLR->getLabel() && "Missing label"); - if (!FuncInfo.MF->getMMI().hasDebugInfo()) { -LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n"); -continue; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL)) .addMetadata(DLR->getLabel()); @@ -1402,12 +1397,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(II); assert(DI->getVariable() && "Missing variable"); -if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI -<< " (!hasDebugInfo)\n"); - return true; -} - if (FuncInfo.PreprocessedDbgDeclares.contains(DI)) return true; @@ -1446,11 +1435,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::dbg_label: { const DbgLabelInst *DI = cast(II); assert(DI->getLabel() && "Missing label"); -if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - return true; -} - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100333 >From 0d0fe0051af3a7a4c11195042d85c8a5caae80b5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 13:11:04 +0400 Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks I assume getSubprogram will do the correct thing in hasDebugInfo, and this is redundant with the debug_compile_units distance check. This is in preparation for removing the field. --- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 6c70c47de8822..ed99eb3c459e5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { Ty->getTag() == dwarf::DW_TAG_unspecified_type; } -static bool hasDebugInfo(const MachineModuleInfo *MMI, - const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) -return false; +static bool hasDebugInfo(const MachineFunction *MF) { auto *SP = MF->getFunction().getSubprogram(); if (!SP) return false; @@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI, void DebugHandlerBase::beginFunction(const MachineFunction *MF) { PrevInstBB = nullptr; - if (!Asm || !hasDebugInfo(MMI, MF)) { + if (!Asm || !hasDebugInfo(MF)) { skippedNonDebugFunction(); return; } @@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() { } void DebugHandlerBase::endFunction(const MachineFunction *MF) { - if (Asm && hasDebugInfo(MMI, MF)) + if (Asm && hasDebugInfo(MF)) endFunctionImpl(MF); DbgValues.clear(); DbgLabels.clear(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5f1f315c5ab24..fbce7e92b7781 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1148,14 +1148,15 @@ sortGlobalExprs(SmallVectorImpl ) { void DwarfDebug::beginModule(Module *M) { DebugHandlerBase::beginModule(M); - if (!Asm || !MMI->hasDebugInfo()) + if (!Asm) return; unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); + if (NumDebugCUs == 0) +return; + assert(NumDebugCUs > 0 && "Asm unexpectedly initialized"); - assert(MMI->hasDebugInfo() && - "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; DenseMap> GVMap; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0b654abd2814c..b4eba07afe7c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() { // Emit initial .loc debug directive for correct relocation symbol data. if (const DISubprogram *SP = MF->getFunction().getSubprogram()) { assert(SP->getUnit()); -if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo()) +if (!SP->getUnit()->isDebugDirectivesOnly()) emitInitialRawDwarfLocDirective(*MF); } } @@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream , if (HasFullDebugInfo) break; } - if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) + if (HasFullDebugInfo) O << ", debug"; O << "\n"; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Remove UsesMSVCFloatingPoint from MachineModuleInfo (PR #100368)
arsenm wrote: > `_fltused` is used to identify the need for supporting functions and is used > during the linking process. Always emitting results in code size increases > and overlinking, but catching the generated ones is definitely something that > we should be doing. So really it should be looking for post-legalize calls to specific runtime libcalls https://github.com/llvm/llvm-project/pull/100368 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100380 >From 1b48c68a9130ebb9fb7a68752be79716fe075dad Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 1 Feb 2023 09:52:34 -0400 Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp InstCombine also should have taken care of this, but this should be helpful when the fcmp based lowering strategy tries to combine multiple tests. --- llvm/lib/CodeGen/CodeGenCommonISel.cpp| 2 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 35 - llvm/test/CodeGen/AArch64/isinf.ll| 22 ++- llvm/test/CodeGen/PowerPC/fp-classify.ll | 32 ++-- llvm/test/CodeGen/X86/is_fpclass-fp80.ll | 52 +++ llvm/test/CodeGen/X86/is_fpclass.ll | 137 +- 6 files changed, 159 insertions(+), 121 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index f5207d8b9d124..bb09b0d1140fc 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP) { case fcSubnormal | fcZero | fcNan: return InvertedTest; case fcInf | fcNan: + case fcPosInf | fcNan: + case fcNegInf | fcNan: // If we're trying to use fcmp, we can take advantage of the nan check // behavior of the compare (but this is more instructions in the integer // expansion). diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ba7c89a33f604..0036c182ab9db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8599,6 +8599,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ; ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ; +// See if we can fold an | fcNan into an unordered compare. +FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan; + +// Can't fold the ordered check if we're only testing for snan or qnan +// individually. +if ((FPTestMask & fcNan) != fcNan) + OrderedFPTestMask = FPTestMask; + +const bool IsOrdered = FPTestMask == OrderedFPTestMask; + if (std::optional IsCmp0 = isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction()); IsCmp0 && (isCondCodeLegalOrCustom( @@ -8618,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, return DAG.getSetCC(DL, ResultVT, Op, Op, IsInvertedFP ? ISD::SETO : ISD::SETUO); -bool IsOrderedInf = FPTestMask == fcInf; -if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) && -isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode - : UnorderedCmpOpcode, -OperandVT.getScalarType().getSimpleVT()) && -isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) { +if (OrderedFPTestMask == fcInf && +isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode + : UnorderedCmpOpcode, +OperandVT.getScalarType().getSimpleVT())) { // isinf(x) --> fabs(x) == inf SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); return DAG.getSetCC(DL, ResultVT, Abs, Inf, - IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode); + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); +} + +if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) { + // isposinf(x) --> x == inf + // isneginf(x) --> x == -inf + // isposinf(x) || nan --> x u== inf + // isneginf(x) || nan --> x u== -inf + + SDValue Inf = DAG.getConstantFP( + APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL, + OperandVT); + return DAG.getSetCC(DL, ResultVT, Op, Inf, + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); } } diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index 834417b98743a..458bd7eeba16c 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -58,14 +58,22 @@ define i32 @replace_isinf_call_f64(double %x) { define i32 @replace_isinf_call_f128(fp128 %x) { ; CHECK-LABEL: replace_isinf_call_f128: ; CHECK: // %bb.0: -; CHECK-NEXT:str q0, [sp, #-16]! -; CHECK-NEXT:.cfi_def_cfa_offset 16 -; CHECK-NEXT:ldp x9, x8, [sp], #16 -; CHECK-NEXT:and x8, x8, #0x7fff -; CHECK-NEXT:eor x8, x8, #0x7fff -; CHECK-NEXT:orr x8, x9, x8 -; CHECK-NEXT:cmp x8, #0 +;
[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100380 >From 37ecc7b70321cdd7ed369d4bec6db50b3f112537 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 1 Feb 2023 09:52:34 -0400 Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp InstCombine also should have taken care of this, but this should be helpful when the fcmp based lowering strategy tries to combine multiple tests. --- llvm/lib/CodeGen/CodeGenCommonISel.cpp| 2 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 35 - llvm/test/CodeGen/PowerPC/fp-classify.ll | 32 ++-- llvm/test/CodeGen/X86/is_fpclass-fp80.ll | 52 +++ llvm/test/CodeGen/X86/is_fpclass.ll | 137 +- 5 files changed, 144 insertions(+), 114 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index f5207d8b9d124..bb09b0d1140fc 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP) { case fcSubnormal | fcZero | fcNan: return InvertedTest; case fcInf | fcNan: + case fcPosInf | fcNan: + case fcNegInf | fcNan: // If we're trying to use fcmp, we can take advantage of the nan check // behavior of the compare (but this is more instructions in the integer // expansion). diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ba7c89a33f604..0036c182ab9db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8599,6 +8599,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ; ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ; +// See if we can fold an | fcNan into an unordered compare. +FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan; + +// Can't fold the ordered check if we're only testing for snan or qnan +// individually. +if ((FPTestMask & fcNan) != fcNan) + OrderedFPTestMask = FPTestMask; + +const bool IsOrdered = FPTestMask == OrderedFPTestMask; + if (std::optional IsCmp0 = isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction()); IsCmp0 && (isCondCodeLegalOrCustom( @@ -8618,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, return DAG.getSetCC(DL, ResultVT, Op, Op, IsInvertedFP ? ISD::SETO : ISD::SETUO); -bool IsOrderedInf = FPTestMask == fcInf; -if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) && -isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode - : UnorderedCmpOpcode, -OperandVT.getScalarType().getSimpleVT()) && -isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) { +if (OrderedFPTestMask == fcInf && +isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode + : UnorderedCmpOpcode, +OperandVT.getScalarType().getSimpleVT())) { // isinf(x) --> fabs(x) == inf SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); return DAG.getSetCC(DL, ResultVT, Abs, Inf, - IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode); + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); +} + +if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) { + // isposinf(x) --> x == inf + // isneginf(x) --> x == -inf + // isposinf(x) || nan --> x u== inf + // isneginf(x) || nan --> x u== -inf + + SDValue Inf = DAG.getConstantFP( + APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL, + OperandVT); + return DAG.getSetCC(DL, ResultVT, Op, Inf, + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); } } diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll index f527b3c48040e..50873f29b2936 100644 --- a/llvm/test/CodeGen/PowerPC/fp-classify.ll +++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll @@ -57,18 +57,30 @@ entry: define zeroext i1 @abs_isinfq(fp128 %x) { ; P8-LABEL: abs_isinfq: ; P8: # %bb.0: # %entry +; P8-NEXT:mflr 0 +; P8-NEXT:stdu 1, -48(1) +; P8-NEXT:std 0, 64(1) +; P8-NEXT:.cfi_def_cfa_offset 48 +; P8-NEXT:.cfi_offset lr, 16 ; P8-NEXT:xxswapd 0, 34 -; P8-NEXT:addi 3, 1, -16 -; P8-NEXT:li 5, 32767 +; P8-NEXT:addi 3, 1, 32 ; P8-NEXT:stxvd2x 0, 0, 3 -; P8-NEXT:rldic 5, 5, 48, 1 -; P8-NEXT:ld 4, -8(1) -; P8-NEXT:
[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100380?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100380** https://app.graphite.dev/github/pr/llvm/llvm-project/100380?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100378** https://app.graphite.dev/github/pr/llvm/llvm-project/100378?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100380 InstCombine also should have taken care of this, but this should be helpful when the fcmp based lowering strategy tries to combine multiple tests. >From c27e0441cacf32077d0c101304a0b0b3d336058c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 1 Feb 2023 09:52:34 -0400 Subject: [PATCH] DAG: Lower single infinity is.fpclass tests to fcmp InstCombine also should have taken care of this, but this should be helpful when the fcmp based lowering strategy tries to combine multiple tests. --- llvm/lib/CodeGen/CodeGenCommonISel.cpp| 2 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 35 - llvm/test/CodeGen/X86/is_fpclass-fp80.ll | 52 +++ llvm/test/CodeGen/X86/is_fpclass.ll | 137 +- 4 files changed, 122 insertions(+), 104 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index f5207d8b9d124..bb09b0d1140fc 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP) { case fcSubnormal | fcZero | fcNan: return InvertedTest; case fcInf | fcNan: + case fcPosInf | fcNan: + case fcNegInf | fcNan: // If we're trying to use fcmp, we can take advantage of the nan check // behavior of the compare (but this is more instructions in the integer // expansion). diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ba7c89a33f604..0036c182ab9db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8599,6 +8599,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ; ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ; +// See if we can fold an | fcNan into an unordered compare. +FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan; + +// Can't fold the ordered check if we're only testing for snan or qnan +// individually. +if ((FPTestMask & fcNan) != fcNan) + OrderedFPTestMask = FPTestMask; + +const bool IsOrdered = FPTestMask == OrderedFPTestMask; + if (std::optional IsCmp0 = isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction()); IsCmp0 && (isCondCodeLegalOrCustom( @@ -8618,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, return DAG.getSetCC(DL, ResultVT, Op, Op, IsInvertedFP ? ISD::SETO : ISD::SETUO); -bool IsOrderedInf = FPTestMask == fcInf; -if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) && -isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode - : UnorderedCmpOpcode, -OperandVT.getScalarType().getSimpleVT()) && -isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) { +if (OrderedFPTestMask == fcInf && +isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode + : UnorderedCmpOpcode, +OperandVT.getScalarType().getSimpleVT())) { // isinf(x) --> fabs(x) == inf SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op); SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT); return DAG.getSetCC(DL, ResultVT, Abs, Inf, - IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode); + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); +} + +if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) { + // isposinf(x) --> x == inf + // isneginf(x) --> x == -inf + // isposinf(x) || nan --> x u== inf + // isneginf(x) || nan --> x u== -inf + + SDValue Inf = DAG.getConstantFP( + APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL, + OperandVT); + return DAG.getSetCC(DL, ResultVT, Op, Inf, + IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode); } } diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll index 52d294ca01720..56d3ba7cd7b2b 100644 --- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll +++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll @@ -265,23 +265,24 @@ entry: define i1 @is_posinf_f80(x86_fp80 %x) nounwind { ; X86-LABEL: is_posinf_f80: ; X86: # %bb.0: # %entry -; X86-NEXT:movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT:movl $-2147483648, %ecx # imm = 0x8000 -; X86-NEXT:xorl {{[0-9]+}}(%esp), %ecx -; X86-NEXT:xorl $32767, %eax # imm = 0x7FFF -; X86-NEXT:orl {{[0-9]+}}(%esp), %eax -; X86-NEXT:orl %ecx,
[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100353?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#100353** https://app.graphite.dev/github/pr/llvm/llvm-project/100353?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100333** https://app.graphite.dev/github/pr/llvm/llvm-project/100333?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#100319** https://app.graphite.dev/github/pr/llvm/llvm-project/100319?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] FastISel: Do not check for module debug info (PR #100353)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100353 I don't see the point of this check and SelectionDAG does not perform it. In the normal usecase, if there's no debug info the debug intrinsics would not be there in the first place. >From 1e3fdb2a58c6709e4d69fd0facdfdec6916802be Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 15:53:01 +0400 Subject: [PATCH] FastISel: Do not check for module debug info I don't see the point of this check and SelectionDAG does not perform it. In the normal usecase, if there's no debug info the debug intrinsics would not be there in the first place. --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 16 1 file changed, 16 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index ef9f783355190..e255bbaa92b16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1203,11 +1203,6 @@ void FastISel::handleDbgInfo(const Instruction *II) { if (DbgLabelRecord *DLR = dyn_cast()) { assert(DLR->getLabel() && "Missing label"); - if (!FuncInfo.MF->getMMI().hasDebugInfo()) { -LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n"); -continue; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL)) .addMetadata(DLR->getLabel()); @@ -1402,12 +1397,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(II); assert(DI->getVariable() && "Missing variable"); -if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI -<< " (!hasDebugInfo)\n"); - return true; -} - if (FuncInfo.PreprocessedDbgDeclares.contains(DI)) return true; @@ -1446,11 +1435,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::dbg_label: { const DbgLabelInst *DI = cast(II); assert(DI->getLabel() && "Missing label"); -if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - return true; -} - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100333 >From 6367409181a47493058cede13de0b623d59e4b45 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 13:11:04 +0400 Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks I assume getSubprogram will do the correct thing in hasDebugInfo, and this is redundant with the debug_compile_units distance check. This is in preparation for removing the field. --- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 6c70c47de8822..ed99eb3c459e5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { Ty->getTag() == dwarf::DW_TAG_unspecified_type; } -static bool hasDebugInfo(const MachineModuleInfo *MMI, - const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) -return false; +static bool hasDebugInfo(const MachineFunction *MF) { auto *SP = MF->getFunction().getSubprogram(); if (!SP) return false; @@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI, void DebugHandlerBase::beginFunction(const MachineFunction *MF) { PrevInstBB = nullptr; - if (!Asm || !hasDebugInfo(MMI, MF)) { + if (!Asm || !hasDebugInfo(MF)) { skippedNonDebugFunction(); return; } @@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() { } void DebugHandlerBase::endFunction(const MachineFunction *MF) { - if (Asm && hasDebugInfo(MMI, MF)) + if (Asm && hasDebugInfo(MF)) endFunctionImpl(MF); DbgValues.clear(); DbgLabels.clear(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5f1f315c5ab24..fbce7e92b7781 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1148,14 +1148,15 @@ sortGlobalExprs(SmallVectorImpl ) { void DwarfDebug::beginModule(Module *M) { DebugHandlerBase::beginModule(M); - if (!Asm || !MMI->hasDebugInfo()) + if (!Asm) return; unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); + if (NumDebugCUs == 0) +return; + assert(NumDebugCUs > 0 && "Asm unexpectedly initialized"); - assert(MMI->hasDebugInfo() && - "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; DenseMap> GVMap; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0b654abd2814c..b4eba07afe7c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() { // Emit initial .loc debug directive for correct relocation symbol data. if (const DISubprogram *SP = MF->getFunction().getSubprogram()) { assert(SP->getUnit()); -if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo()) +if (!SP->getUnit()->isDebugDirectivesOnly()) emitInitialRawDwarfLocDirective(*MF); } } @@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream , if (HasFullDebugInfo) break; } - if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) + if (HasFullDebugInfo) O << ", debug"; O << "\n"; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100333 >From 6367409181a47493058cede13de0b623d59e4b45 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 13:11:04 +0400 Subject: [PATCH 1/2] DebugInfo: Avoid some MMI::hasDebugInfo checks I assume getSubprogram will do the correct thing in hasDebugInfo, and this is redundant with the debug_compile_units distance check. This is in preparation for removing the field. --- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 6c70c47de8822..ed99eb3c459e5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { Ty->getTag() == dwarf::DW_TAG_unspecified_type; } -static bool hasDebugInfo(const MachineModuleInfo *MMI, - const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) -return false; +static bool hasDebugInfo(const MachineFunction *MF) { auto *SP = MF->getFunction().getSubprogram(); if (!SP) return false; @@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI, void DebugHandlerBase::beginFunction(const MachineFunction *MF) { PrevInstBB = nullptr; - if (!Asm || !hasDebugInfo(MMI, MF)) { + if (!Asm || !hasDebugInfo(MF)) { skippedNonDebugFunction(); return; } @@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() { } void DebugHandlerBase::endFunction(const MachineFunction *MF) { - if (Asm && hasDebugInfo(MMI, MF)) + if (Asm && hasDebugInfo(MF)) endFunctionImpl(MF); DbgValues.clear(); DbgLabels.clear(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5f1f315c5ab24..fbce7e92b7781 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1148,14 +1148,15 @@ sortGlobalExprs(SmallVectorImpl ) { void DwarfDebug::beginModule(Module *M) { DebugHandlerBase::beginModule(M); - if (!Asm || !MMI->hasDebugInfo()) + if (!Asm) return; unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); + if (NumDebugCUs == 0) +return; + assert(NumDebugCUs > 0 && "Asm unexpectedly initialized"); - assert(MMI->hasDebugInfo() && - "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; DenseMap> GVMap; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0b654abd2814c..b4eba07afe7c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() { // Emit initial .loc debug directive for correct relocation symbol data. if (const DISubprogram *SP = MF->getFunction().getSubprogram()) { assert(SP->getUnit()); -if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo()) +if (!SP->getUnit()->isDebugDirectivesOnly()) emitInitialRawDwarfLocDirective(*MF); } } @@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream , if (HasFullDebugInfo) break; } - if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) + if (HasFullDebugInfo) O << ", debug"; O << "\n"; >From 1e3fdb2a58c6709e4d69fd0facdfdec6916802be Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 15:53:01 +0400 Subject: [PATCH 2/2] FastISel: Do not check for module debug info I don't see the point of this check and SelectionDAG does not perform it. In the normal usecase, if there's no debug info the debug intrinsics would not be there in the first place. --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 16 1 file changed, 16 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index ef9f783355190..e255bbaa92b16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1203,11 +1203,6 @@ void FastISel::handleDbgInfo(const Instruction *II) { if (DbgLabelRecord *DLR = dyn_cast()) { assert(DLR->getLabel() && "Missing label"); - if (!FuncInfo.MF->getMMI().hasDebugInfo()) { -LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n"); -continue; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL)) .addMetadata(DLR->getLabel()); @@ -1402,12 +1397,6 @@ bool
[llvm-branch-commits] [llvm] DebugInfo: Avoid some MMI::hasDebugInfo checks (PR #100333)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100333 >From 442532e0d50039d0bb3603520d361b2ee4b4a1b5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jul 2024 13:11:04 +0400 Subject: [PATCH] DebugInfo: Avoid some MMI::hasDebugInfo checks I assume getSubprogram will do the correct thing in hasDebugInfo, and this is redundant with the debug_compile_units distance check. This is in preparation for removing the field. --- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp | 9 +++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp| 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 6c70c47de8822..ed99eb3c459e5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { Ty->getTag() == dwarf::DW_TAG_unspecified_type; } -static bool hasDebugInfo(const MachineModuleInfo *MMI, - const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) -return false; +static bool hasDebugInfo(const MachineFunction *MF) { auto *SP = MF->getFunction().getSubprogram(); if (!SP) return false; @@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI, void DebugHandlerBase::beginFunction(const MachineFunction *MF) { PrevInstBB = nullptr; - if (!Asm || !hasDebugInfo(MMI, MF)) { + if (!Asm || !hasDebugInfo(MF)) { skippedNonDebugFunction(); return; } @@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() { } void DebugHandlerBase::endFunction(const MachineFunction *MF) { - if (Asm && hasDebugInfo(MMI, MF)) + if (Asm && hasDebugInfo(MF)) endFunctionImpl(MF); DbgValues.clear(); DbgLabels.clear(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5f1f315c5ab24..fbce7e92b7781 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1148,14 +1148,15 @@ sortGlobalExprs(SmallVectorImpl ) { void DwarfDebug::beginModule(Module *M) { DebugHandlerBase::beginModule(M); - if (!Asm || !MMI->hasDebugInfo()) + if (!Asm) return; unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); + if (NumDebugCUs == 0) +return; + assert(NumDebugCUs > 0 && "Asm unexpectedly initialized"); - assert(MMI->hasDebugInfo() && - "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; DenseMap> GVMap; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0b654abd2814c..b4eba07afe7c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() { // Emit initial .loc debug directive for correct relocation symbol data. if (const DISubprogram *SP = MF->getFunction().getSubprogram()) { assert(SP->getUnit()); -if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo()) +if (!SP->getUnit()->isDebugDirectivesOnly()) emitInitialRawDwarfLocDirective(*MF); } } @@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module , raw_ostream , if (HasFullDebugInfo) break; } - if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) + if (HasFullDebugInfo) O << ", debug"; O << "\n"; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits