llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-globalisel Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/96876.diff 2 Files Affected: - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+15-27) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+12-6) ``````````diff diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4bbb4375ee997..a3115bfa4d230 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18655,32 +18655,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy); return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { - Intrinsic::ID IID; - llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - IID = Intrinsic::amdgcn_global_atomic_fmin; - break; - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - IID = Intrinsic::amdgcn_global_atomic_fmax; - break; - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmin; - break; - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmax; - break; - } - llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); - llvm::Value *Val = EmitScalarExpr(E->getArg(1)); - llvm::Function *F = - CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); - return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19054,7 +19028,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19080,6 +19058,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: BinOp = llvm::AtomicRMWInst::FAdd; break; + case AMDGPU::BI__builtin_amdgcn_ds_fminf: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + BinOp = llvm::AtomicRMWInst::FMin; + break; + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + BinOp = llvm::AtomicRMWInst::FMax; + break; } Address Ptr = CheckAtomicAlignment(*this, E); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index 60a3033a36c17..cfc5adc57bf5e 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) { } // CHECK-LABEL: test_global_global_min_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_global_min_f64$local // GFX90A: global_atomic_min_f64 void test_global_global_min_f64(__global double *addr, double x){ @@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double x){ } // CHECK-LABEL: test_global_max_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_max_f64$local // GFX90A: global_atomic_max_f64 void test_global_max_f64(__global double *addr, double x){ @@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_min_flat_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_min_flat_f64$local // GFX90A: flat_atomic_min_f64 void test_flat_min_flat_f64(__generic double *addr, double x){ @@ -74,7 +77,8 @@ void test_flat_min_flat_f64(__generic double *addr, double x){ } // CHECK-LABEL: test_flat_global_min_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A: test_flat_global_min_f64$local // GFX90A: global_atomic_min_f64 void test_flat_global_min_f64(__global double *addr, double x){ @@ -83,7 +87,8 @@ void test_flat_global_min_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_max_flat_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_max_flat_f64$local // GFX90A: flat_atomic_max_f64 void test_flat_max_flat_f64(__generic double *addr, double x){ @@ -92,7 +97,8 @@ void test_flat_max_flat_f64(__generic double *addr, double x){ } // CHECK-LABEL: test_flat_global_max_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_max_f64$local // GFX90A: global_atomic_max_f64 void test_flat_global_max_f64(__global double *addr, double x){ `````````` </details> https://github.com/llvm/llvm-project/pull/96876 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits