https://github.com/jthackray created https://github.com/llvm/llvm-project/pull/191301
`__builtin_arm_atomic_store_with_stshh` must satisfy two constraints: - preserve release/seq_cst ordering in LLVM IR - keep `stshh` immediately adjacent to the final store in codegen The original target-intrinsic lowering preserved the final `stshh` + store sequence, but it did not model ordering strongly enough in LLVM IR, so the optimizer could sink earlier stores across the builtin. Fix this by inserting a `release` or `seq_cst` fence before the intrinsic call. This preserves ordering in optimized IR while still letting the backend emit the required final instruction sequence. This means we now get a `dmb ish` instruction before the `stshh` instruction. Also relax Sema for the builtin to accept storing 8/16/32/64-bit floating-point and pointer values in addition to integers, and update the diagnostic text accordingly. Add coverage for: - integer, floating-point, and pointer codegen cases - Sema acceptance/rejection cases - optimized-IR ordering regression coverage - AArch64 assembly checks for the final release-store sequence >From 47107a7d2befa33d97d70c8379def3555d3af1b9 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray <[email protected]> Date: Thu, 9 Apr 2026 20:29:29 +0100 Subject: [PATCH] [AArch64][clang] Fix `__arm_atomic_store_with_stshh` ordering and lowering `__builtin_arm_atomic_store_with_stshh` must satisfy two constraints: - preserve release/seq_cst ordering in LLVM IR - keep `stshh` immediately adjacent to the final store in codegen The original target-intrinsic lowering preserved the final `stshh` + store sequence, but it did not model ordering strongly enough in LLVM IR, so the optimizer could sink earlier stores across the builtin. Fix this by inserting a `release` or `seq_cst` fence before the intrinsic call. This preserves ordering in optimized IR while still letting the backend emit the required final instruction sequence. This means we now get a `dmb ish` instruction before the `stshh` instruction. Also relax Sema for the builtin to accept storing 8/16/32/64-bit floating-point and pointer values in addition to integers, and update the diagnostic text accordingly. Add coverage for: - integer, floating-point, and pointer codegen cases - Sema acceptance/rejection cases - optimized-IR ordering regression coverage - AArch64 assembly checks for the final release-store sequence --- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 33 +++++++++++++--- clang/lib/Sema/SemaARM.cpp | 5 ++- .../AArch64/pcdphint-atomic-store-order.c | 37 ++++++++++++++++++ .../CodeGen/AArch64/pcdphint-atomic-store.c | 38 +++++++++++++++++++ .../test/Sema/AArch64/pcdphint-atomic-store.c | 18 ++++++--- .../CodeGen/AArch64/pcdphint-atomic-store.ll | 13 +++++++ 7 files changed, 134 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/pcdphint-atomic-store-order.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6d2fae551566f..b3f54df177fd9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9600,7 +9600,7 @@ def err_atomic_builtin_pointer_size : Error< "type (%0 invalid)">; def err_arm_atomic_store_with_stshh_bad_type : Error< "address argument to '__arm_atomic_store_with_stshh' must be a pointer to an " - "8,16,32, or 64-bit integer type (%0 invalid)">; + "8,16,32, or 64-bit integer, floating-point, or pointer type (%0 invalid)">; def err_arm_atomic_store_with_stshh_bad_value_type : Error< "value argument to '__arm_atomic_store_with_stshh' must be %0; got %1">; def err_arm_atomic_store_with_stshh_bad_order : Error< diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 3e0ec2c143428..60510b2b5d2a2 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4666,14 +4666,36 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, auto *OrderC = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); auto *PolicyC = cast<ConstantInt>(EmitScalarExpr(E->getArg(3))); - // Compute pointee bit-width from arg0 and create as i32 constant QualType ValQT = E->getArg(0)->getType()->castAs<PointerType>()->getPointeeType(); unsigned SizeBits = getContext().getTypeSize(ValQT); - auto *SizeC = llvm::ConstantInt::get(Int32Ty, SizeBits); + auto *StoreTy = Builder.getIntNTy(SizeBits); + Value *StoreBits; + if (ValQT->isIntegerType()) { + StoreBits = Builder.CreateIntCast(StoreValue, StoreTy, + ValQT->isSignedIntegerType()); + } else if (ValQT->isPointerType()) { + StoreBits = Builder.CreatePtrToInt(StoreValue, StoreTy); + } else { + assert(ValQT->isRealFloatingType() && + "unexpected type for stshh atomic store"); + StoreBits = Builder.CreateBitCast(StoreValue, StoreTy); + } - Value *StoreValue64 = Builder.CreateIntCast(StoreValue, Int64Ty, - ValQT->isSignedIntegerType()); + Value *StoreValue64 = Builder.CreateZExt(StoreBits, Int64Ty); + + switch (OrderC->getZExtValue()) { + case 0: + break; + case 3: + Builder.CreateFence(llvm::AtomicOrdering::Release); + break; + case 5: + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); + break; + default: + llvm_unreachable("unexpected memory order for stshh atomic store"); + } Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store, {StoreAddr->getType()}); @@ -4683,7 +4705,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall( F, {StoreAddr, StoreValue64, ConstantInt::get(Int32Ty, OrderC->getZExtValue()), - ConstantInt::get(Int32Ty, PolicyC->getZExtValue()), SizeC}); + ConstantInt::get(Int32Ty, PolicyC->getZExtValue()), + ConstantInt::get(Int32Ty, SizeBits)}); } if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr || diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index e54c8228e5ff8..4ca2b0603317d 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1154,7 +1154,10 @@ static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM &S, } ValType = ValType.getUnqualifiedType(); - unsigned Bits = ValType->isIntegerType() ? Context.getTypeSize(ValType) : 0; + bool IsSupportedType = ValType->isIntegerType() || + ValType->isRealFloatingType() || + ValType->isPointerType(); + unsigned Bits = IsSupportedType ? Context.getTypeSize(ValType) : 0; if (Bits != 8 && Bits != 16 && Bits != 32 && Bits != 64) { SemaRef.Diag(PointerArg->getBeginLoc(), diag::err_arm_atomic_store_with_stshh_bad_type) diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store-order.c b/clang/test/CodeGen/AArch64/pcdphint-atomic-store-order.c new file mode 100644 index 0000000000000..fc716d34c1787 --- /dev/null +++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store-order.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +v9.6a -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=IR +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +v9.6a -O2 -S -o - %s | FileCheck %s --check-prefix=ASM + +#include <arm_acle.h> + +// IR-LABEL: define dso_local void @test_release_store_ordering( +// IR: if.then: +// IR-NEXT: store i32 %v, ptr %b, align 4 +// IR-NEXT: %0 = zext i32 %v to i64 +// IR-NEXT: fence release +// IR-NEXT: tail call void @llvm.aarch64.stshh.atomic.store.p0(ptr %a, i64 %0, i32 3, i32 0, i32 32) +// IR-NEXT: br label %if.end +// +// ASM-LABEL: test_release_store_ordering: +// ASM: str w2, [x1] +// ASM-NEXT: mov w8, w2 +// ASM-NEXT: dmb ish +// ASM-NEXT: stshh keep +// ASM-NEXT: stlr w8, [x0] +void test_release_store_ordering(int *__restrict a, int *__restrict b, int v, + int c) { + if (c) { + *b = v; + __arm_atomic_store_with_stshh(a, v, __ATOMIC_RELEASE, 0); + } else { + *b = v + 1; + } +} + +// ASM-LABEL: demo_f32: +// ASM: fmov w8, s0 +// ASM-NEXT: dmb ish +// ASM-NEXT: stshh keep +// ASM-NEXT: stlr w8, [x0] +void demo_f32(float *p, float v) { + __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 0); +} diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c index f48f1d6344bc5..0ee0f67a51aa8 100644 --- a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c +++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c @@ -30,6 +30,7 @@ void test_u8(unsigned char *p, unsigned char v) { // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[V_ADDR]], align 2 // CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i64 +// CHECK-NEXT: fence release // CHECK-NEXT: call void @llvm.aarch64.stshh.atomic.store.p0(ptr [[TMP0]], i64 [[TMP2]], i32 3, i32 1, i32 16) // CHECK-NEXT: ret void // @@ -47,6 +48,7 @@ void test_u16(unsigned short *p, unsigned short v) { // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V_ADDR]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: fence seq_cst // CHECK-NEXT: call void @llvm.aarch64.stshh.atomic.store.p0(ptr [[TMP0]], i64 [[TMP2]], i32 5, i32 0, i32 32) // CHECK-NEXT: ret void // @@ -69,3 +71,39 @@ void test_u32(unsigned int *p, unsigned int v) { void test_u64(unsigned long *p, unsigned long v) { __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1); } + +// CHECK-LABEL: define dso_local void @test_f32( +// CHECK-SAME: ptr noundef [[P:%.*]], float noundef [[V:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: store float [[V]], ptr [[V_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[V_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[TMP1]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +// CHECK-NEXT: fence release +// CHECK-NEXT: call void @llvm.aarch64.stshh.atomic.store.p0(ptr [[TMP0]], i64 [[TMP3]], i32 3, i32 0, i32 32) +// CHECK-NEXT: ret void +// +void test_f32(float *p, float v) { + __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 0); +} + +// CHECK-LABEL: define dso_local void @test_ptr( +// CHECK-SAME: ptr noundef [[P:%.*]], ptr noundef [[V:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +// CHECK-NEXT: call void @llvm.aarch64.stshh.atomic.store.p0(ptr [[TMP0]], i64 [[TMP2]], i32 0, i32 1, i32 64) +// CHECK-NEXT: ret void +// +void test_ptr(void **p, void *v) { + __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1); +} diff --git a/clang/test/Sema/AArch64/pcdphint-atomic-store.c b/clang/test/Sema/AArch64/pcdphint-atomic-store.c index 5b4bf27003a5a..c6ac5b804e9b1 100644 --- a/clang/test/Sema/AArch64/pcdphint-atomic-store.c +++ b/clang/test/Sema/AArch64/pcdphint-atomic-store.c @@ -17,20 +17,28 @@ void test_const_pointer(const unsigned int *p, unsigned int v) { // expected-error@-1 {{address argument to atomic builtin cannot be const-qualified}} } -void test_non_integer_pointer(float *p, float v) { +void test_float_ok(float *p, float v) { __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0); - // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 8,16,32, or 64-bit integer type}} } -void test_invalid_bit_width(__int128 *p, __int128 v) { +void test_pointer_ok(void **p, void *v) { __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0); - // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 8,16,32, or 64-bit integer type}} } struct IncompleteType; +void test_invalid_struct_pointer(struct IncompleteType *p, int v) { + __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0); + // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 8,16,32, or 64-bit integer, floating-point, or pointer type}} +} + +void test_invalid_bit_width(__int128 *p, __int128 v) { + __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0); + // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 8,16,32, or 64-bit integer, floating-point, or pointer type}} +} + void test_incomplete_pointee(struct IncompleteType *p, int v) { __builtin_arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0); - // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 8,16,32, or 64-bit integer type}} + // expected-error@-1 {{address argument to '__arm_atomic_store_with_stshh' must be a pointer to an 8,16,32, or 64-bit integer, floating-point, or pointer type}} } void test_invalid_memory_order(unsigned int *p, unsigned int v) { diff --git a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll index 6e48cb348ca05..e7ec4325f8cac 100644 --- a/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll +++ b/llvm/test/CodeGen/AArch64/pcdphint-atomic-store.ll @@ -72,6 +72,19 @@ define void @test_keep_release_i32(ptr %p, i64 %v) { ret void } +define void @test_keep_release_f32(ptr %p, float %v) { +; CHECK-LABEL: test_keep_release_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlr w8, [x0] +; CHECK-NEXT: ret + %bits = bitcast float %v to i32 + %ext = zext i32 %bits to i64 + call void @llvm.aarch64.stshh.atomic.store.p0(ptr %p, i64 %ext, i32 3, i32 0, i32 32) + ret void +} + define void @test_keep_release_i64(ptr %p, i64 %v) { ; CHECK-LABEL: test_keep_release_i64: ; CHECK: // %bb.0: _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
