[clang] [llvm] clang/AMDGPU: Emit atomicrmw from ds_fadd builtins (PR #95395)

2024-06-18 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/95395
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] clang/AMDGPU: Emit atomicrmw from ds_fadd builtins (PR #95395)

2024-06-18 Thread Yaxun Liu via cfe-commits

yxsamliu wrote:

These builtins generate atomic instructions in IR but the builtin function name 
does not have atomic. Is that a concern? Should they be renamed with atomic in 
name?

https://github.com/llvm/llvm-project/pull/95395
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] clang/AMDGPU: Emit atomicrmw from ds_fadd builtins (PR #95395)

2024-06-18 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/95395

>From 35c741fe2563094bc20c179ee9f244620025405c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 10 Jun 2024 19:40:59 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from ds_fadd builtins

We should have done this for the f32/f64 case a long time ago. Now that
codegen handles atomicrmw selection for the v2f16/v2bf16 case, start emitting
it instead.

This also does upgrade the behavior to respect a volatile qualified pointer,
which was previously ignored (for the cases that don't have an explicit
volatile argument).
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 113 +++---
 clang/test/CodeGenCUDA/builtins-amdgcn.cu |   2 +-
 .../test/CodeGenCUDA/builtins-spirv-amdgcn.cu |   2 +-
 .../builtins-unsafe-atomics-gfx90a.cu |   5 +-
 ...tins-unsafe-atomics-spirv-amdgcn-gfx90a.cu |   2 +-
 .../test/CodeGenOpenCL/builtins-amdgcn-vi.cl  |  37 +-
 .../builtins-fp-atomics-gfx12.cl  |  14 ++-
 .../CodeGenOpenCL/builtins-fp-atomics-gfx8.cl |   9 +-
 .../builtins-fp-atomics-gfx90a.cl |   4 +-
 .../builtins-fp-atomics-gfx940.cl |  10 +-
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |   3 +-
 11 files changed, 139 insertions(+), 62 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 511e1fd4016d7..d81cf40c912de 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18140,9 +18140,35 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value 
*Order, Value *Scope,
 break;
   }
 
+  // Some of the atomic builtins take the scope as a string name.
   StringRef scp;
-  llvm::getConstantStringInfo(Scope, scp);
-  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+  if (llvm::getConstantStringInfo(Scope, scp)) {
+SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+return;
+  }
+
+  // Older builtins had an enum argument for the memory scope.
+  int scope = cast(Scope)->getZExtValue();
+  switch (scope) {
+  case 0: // __MEMORY_SCOPE_SYSTEM
+SSID = llvm::SyncScope::System;
+break;
+  case 1: // __MEMORY_SCOPE_DEVICE
+SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+break;
+  case 2: // __MEMORY_SCOPE_WRKGRP
+SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
+break;
+  case 3: // __MEMORY_SCOPE_WVFRNT
+SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+break;
+  case 4: // __MEMORY_SCOPE_SINGLE
+SSID = llvm::SyncScope::SingleThread;
+break;
+  default:
+SSID = llvm::SyncScope::System;
+break;
+  }
 }
 
 llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned 
ICEArguments,
@@ -18558,14 +18584,10 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
   }
-  case AMDGPU::BI__builtin_amdgcn_ds_faddf:
   case AMDGPU::BI__builtin_amdgcn_ds_fminf:
   case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
 Intrinsic::ID Intrin;
 switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_ds_faddf:
-  Intrin = Intrinsic::amdgcn_ds_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
   Intrin = Intrinsic::amdgcn_ds_fmin;
   break;
@@ -18656,35 +18678,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
 return Builder.CreateCall(F, {Addr, Val});
   }
-  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
-  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
-Intrinsic::ID IID;
-llvm::Type *ArgTy;
-switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
-  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
-  IID = Intrinsic::amdgcn_ds_fadd;
-  break;
-case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
-  ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
-  IID = Intrinsic::amdgcn_ds_fadd;
-  break;
-case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
-  ArgTy = llvm::FixedVectorType::get(
-  llvm::Type::getHalfTy(getLLVMContext()), 2);
-  IID = Intrinsic::amdgcn_ds_fadd;
-  break;
-}
-llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
-llvm::Value *Val = EmitScalarExpr(E->getArg(1));
-llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
-llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
-llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
-llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
-llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
-return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
-  }
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
   case 

[clang] [llvm] clang/AMDGPU: Emit atomicrmw from ds_fadd builtins (PR #95395)

2024-06-15 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/95395
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits