https://github.com/zatrazz updated https://github.com/llvm/llvm-project/pull/194632
>From 231370b90ebc31baa7c3f2226e2a239ea013c82b Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Thu, 9 Apr 2026 16:24:34 -0300 Subject: [PATCH 1/8] [AArch64] Add lowering for the _CountTrailingZeros/_CountTrailingZeros64 MS intrinsics Lower to llvm.cttz with is_zero_undef=false, following the same pattern as _CountLeadingZeros. _CountTrailingZeros64 truncates the i64 cttz result to i32 since the count is at most 63. Documented at: https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180 --- clang/include/clang/Basic/BuiltinsAArch64.td | 2 ++ .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 4 +++- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 13 +++++++++++ clang/lib/Headers/intrin.h | 2 ++ .../test/CodeGen/arm64-microsoft-intrinsics.c | 23 +++++++++++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td index ba30e344911aa..6607fb1926065 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.td +++ b/clang/include/clang/Basic/BuiltinsAArch64.td @@ -394,6 +394,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", def _CountLeadingZeros64 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned long long int)">; def _CountOneBits : AArch64NoPrefixTargetLibBuiltin<"unsigned int (msuint32_t)">; def _CountOneBits64 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned long long int)">; + def _CountTrailingZeros : AArch64NoPrefixTargetLibBuiltin<"unsigned int (msuint32_t)">; + def _CountTrailingZeros64: AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned long long int)">; } let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", Header = "intrin.h" in { diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 562f66d4cca16..05ae340448033 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2140,7 +2140,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, } if (builtinID == AArch64::BI_CountOneBits || - builtinID == AArch64::BI_CountOneBits64) { + builtinID == AArch64::BI_CountOneBits64 || + builtinID == AArch64::BI_CountTrailingZeros || + builtinID == AArch64::BI_CountTrailingZeros64) { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index f8990ced2a577..eba4f4539a0f1 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -5235,6 +5235,19 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Result; } + if (BuiltinID == AArch64::BI_CountTrailingZeros || + BuiltinID == AArch64::BI_CountTrailingZeros64) { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = ArgValue->getType(); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + + // MSVC leaves 0 undefined; use false for predictable codegen + Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getInt1(false)}); + if (BuiltinID == AArch64::BI_CountTrailingZeros64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } + if (BuiltinID == AArch64::BI__prefetch) { Value *Address = EmitScalarExpr(E->getArg(0)); Value *RW = llvm::ConstantInt::get(Int32Ty, 0); diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index e5d08a217e05e..b90e340d2d766 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -441,6 +441,8 @@ unsigned int _CountLeadingSigns(long); unsigned int _CountLeadingSigns64(__int64); unsigned int _CountOneBits(unsigned long); unsigned int _CountOneBits64(unsigned __int64); +unsigned int _CountTrailingZeros(unsigned long); +unsigned int _CountTrailingZeros64(unsigned __int64); unsigned int __hlt(unsigned int, ...); diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index 2f5ab50d6c848..4e0cabc5e11cd 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -584,6 +584,29 @@ unsigned int check_CountOneBits64(unsigned __int64 arg1) { // CHECK-MSCOMPAT: ret i32 %[[VAR2]] // CHECK-LINUX: error: call to undeclared function '_CountOneBits64' +unsigned int check_CountTrailingZeros(unsigned LONG arg1) { + return _CountTrailingZeros(arg1); +} + +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i32 @llvm.cttz.i32(i32 %[[VAR0]], i1 false) +// CHECK-MSCOMPAT: ret i32 %[[VAR1]] +// CHECK-LINUX: error: call to undeclared function '_CountTrailingZeros' + +unsigned int check_CountTrailingZeros64(unsigned __int64 arg1) { + return _CountTrailingZeros64(arg1); +} + +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i64 @llvm.cttz.i64(i64 %[[VAR0]], i1 false) +// CHECK-MSCOMPAT: %[[VAR2:.*]] = trunc i64 %[[VAR1]] to i32 +// CHECK-MSCOMPAT: ret i32 %[[VAR2]] +// CHECK-LINUX: error: call to undeclared function '_CountTrailingZeros64' + void check__prefetch(void *arg1) { return __prefetch(arg1); } >From 8c106cb45ac3094d82f4784df58fcbc1236ed4f9 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Sat, 18 Apr 2026 08:37:49 -0300 Subject: [PATCH 2/8] [AArch64] Add ARM64_SYSREG, ARM64_FPCR, and ARM64_FPSR to arm64intr.h These macros let Windows AArch64 programs construct system-register encodings for use with the existing _ReadStatusReg/_WriteStatusReg intrinsics. ARM64_FPCR and ARM64_FPSR are the most commonly used predefined constants. The ARM64_SYSREG macro only supports op0=2 or 3 (the op0 MSB is implicit in the encoding). --- clang/lib/Headers/arm64intr.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h index 4943b2db69d02..53a3d57a6e9d1 100644 --- a/clang/lib/Headers/arm64intr.h +++ b/clang/lib/Headers/arm64intr.h @@ -15,6 +15,16 @@ #ifndef __ARM64INTR_H #define __ARM64INTR_H +/* Encode an AArch64 system register for use with + _ReadStatusReg/_WriteStatusReg. op0 must be 2 or 3; only the low bit is + stored. */ +#define ARM64_SYSREG(op0, op1, CRn, CRm, op2) \ + ((((op0) & 0x1) << 14) | (((op1) & 0x7) << 11) | (((CRn) & 0xF) << 7) | \ + (((CRm) & 0xF) << 3) | ((op2) & 0x7)) + +#define ARM64_FPCR ARM64_SYSREG(3, 3, 4, 4, 0) +#define ARM64_FPSR ARM64_SYSREG(3, 3, 4, 4, 1) + typedef enum { _ARM64_BARRIER_SY = 0xF, >From 0fe063dd125e2d460ddc935b901ed9c3ce80b051 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Thu, 30 Apr 2026 16:52:08 -0300 Subject: [PATCH 3/8] [AArch64] Use llvm.read_volatile_register for the __getReg MS intrinsic llvm.read_register carries IntrReadMem, which allows the compiler to CSE repeated reads or eliminate the result if it goes unused. llvm.read_volatile_register carries IntrHasSideEffects, which prevents both. The __getReg targets hardware registers (e.g. x18, the platform thread-pointer on Windows AArch64) whose values can change between calls or that must be observed even if the result is discarded. --- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 2 +- clang/test/CodeGen/arm64-microsoft-intrinsics.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index eba4f4539a0f1..c88313f7b53e7 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4824,7 +4824,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Function *F = - CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty}); + CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty}); return Builder.CreateCall(F, Metadata); } diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index 4e0cabc5e11cd..79c5a8c823224 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -154,8 +154,8 @@ unsigned __int64 check__getReg(void) { return reg; } -// CHECK-MSCOMPAT: call i64 @llvm.read_register.i64(metadata ![[MD2:.*]]) -// CHECK-MSCOMPAT: call i64 @llvm.read_register.i64(metadata ![[MD3:.*]]) +// CHECK-MSCOMPAT: call i64 @llvm.read_volatile_register.i64(metadata ![[MD2:.*]]) +// CHECK-MSCOMPAT: call i64 @llvm.read_volatile_register.i64(metadata ![[MD3:.*]]) #ifdef __LP64__ #define LONG __int32 >From b4a84ecd9e3e06edf7ad7969e0b748dd0146708a Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Thu, 30 Apr 2026 20:22:59 -0300 Subject: [PATCH 4/8] [IR] Add llvm.write_volatile_register intrinsic Add llvm.write_volatile_register as the write-side counterpart to llvm.read_volatile_register. The default MemoryEffects::unknown() prevents the optimizer from reordering or eliminating the call. Unlike llvm.write_register, the SelectionDAG lowering additionally emits a FAKE_USE of the target physical register directly (via RegisterSDNode) after the WRITE_REGISTER node. This marks the register live and prevents the backend from dead-eliminating the write. It is preferred over a READ_REGISTER node, which would emit extra register copies (e.g. fmov xN, dN for FP/SIMD registers). The primary use case is lowering MS-compat __setReg and __setRegFp intrinsics on AArch64, where the target register may be in a different register class (integer vs. FP/SIMD) from the source value. --- llvm/docs/LangRef.rst | 47 +++++++---- llvm/include/llvm/IR/Intrinsics.td | 2 + .../SelectionDAG/SelectionDAGBuilder.cpp | 29 +++++++ llvm/lib/IR/Verifier.cpp | 8 ++ .../AArch64/write-volatile-register.ll | 77 +++++++++++++++++++ 5 files changed, 147 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/write-volatile-register.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ee6dd32e5e852..199c7c6cd6347 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15248,9 +15248,10 @@ called). .. _int_read_register: .. _int_read_volatile_register: .. _int_write_register: +.. _int_write_volatile_register: -'``llvm.read_register``', '``llvm.read_volatile_register``', and '``llvm.write_register``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.read_register``', '``llvm.read_volatile_register``', '``llvm.write_register``', and '``llvm.write_volatile_register``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -15263,26 +15264,30 @@ Syntax: declare i64 @llvm.read_volatile_register.i64(metadata) declare void @llvm.write_register.i32(metadata, i32 @value) declare void @llvm.write_register.i64(metadata, i64 @value) + declare void @llvm.write_volatile_register.i32(metadata, i32 @value) + declare void @llvm.write_volatile_register.i64(metadata, i64 @value) !0 = !{!"sp\00"} Overview: """"""""" -The '``llvm.read_register``', '``llvm.read_volatile_register``', and -'``llvm.write_register``' intrinsics provide access to the named register. -The register must be valid on the architecture being compiled to. The type -needs to be compatible with the register being read. +The '``llvm.read_register``', '``llvm.read_volatile_register``', +'``llvm.write_register``', and '``llvm.write_volatile_register``' intrinsics +provide access to the named register. The register must be valid on the +architecture being compiled to. The type needs to be compatible with the +register being accessed. Semantics: """""""""" The '``llvm.read_register``' and '``llvm.read_volatile_register``' intrinsics return the current value of the register, where possible. The -'``llvm.write_register``' intrinsic sets the current value of the register, -where possible. +'``llvm.write_register``' and '``llvm.write_volatile_register``' intrinsics +set the current value of the register, where possible. -A call to '``llvm.read_volatile_register``' is assumed to have side-effects -and possibly return a different value each time (e.g., for a timer register). +A call to '``llvm.read_volatile_register``' or +'``llvm.write_volatile_register``' is assumed to have side-effects and will +not be reordered or eliminated by the optimizer. This is useful to implement named register global variables that need to always be mapped to a specific register, as is common practice on @@ -15290,12 +15295,22 @@ bare-metal programs including OS kernels. The compiler doesn't check for register availability or use of the used register in surrounding code, including inline assembly. Because of that, -allocatable registers are not supported. - -Warning: So far it only works with the stack pointer on selected -architectures (ARM, AArch64, PowerPC and x86_64). Significant amount of -work is needed to support other registers and even more so, allocatable -registers. +allocatable registers are not supported by '``llvm.read_register``', +'``llvm.read_volatile_register``', or '``llvm.write_register``'. + +'``llvm.write_volatile_register``' supports allocatable registers. Writing +to an allocatable register means the value is copied into that physical +register at the point of the call; the register may subsequently be +reused by the register allocator for other purposes. The backend emits a +``FAKE_USE`` of the physical register after the write to prevent the store +from being dead-eliminated before register allocation. + +Warning: Register support is target-specific. The IR-level verifier does +not validate register names; an unsupported name results in a fatal error +during code generation. Supported registers vary by target and can be +found in each target's ``getRegisterByName`` implementation. +'``llvm.write_volatile_register``' support for allocatable registers is +currently only implemented on AArch64. .. _int_stacksave: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 993ddd7e33701..3e26fe406cebf 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -913,6 +913,8 @@ def int_read_register : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_metadata_ [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], [IntrNoCallback], "llvm.write_register">; +def int_write_volatile_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], + [], "llvm.write_volatile_register">; def int_read_volatile_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrHasSideEffects], "llvm.read_volatile_register">; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 579bff7d3ab60..f1e8d80175da5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6692,6 +6692,35 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, RegName, getValue(RegValue))); return; } + case Intrinsic::write_volatile_register: { + Value *Reg = I.getArgOperand(0); + Value *RegValue = I.getArgOperand(1); + SDValue Chain = getRoot(); + const MDNode *MD = cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()); + SDValue RegName = DAG.getMDNode(MD); + EVT VT = TLI.getValueType(DAG.getDataLayout(), RegValue->getType()); + SDValue WriteChain = DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, + Chain, RegName, getValue(RegValue)); + // FAKE_USE of the physical register marks it live after the WRITE_REGISTER, + // preventing the backend from dead-eliminating the write. This is + // preferred over READ_REGISTER, which would emit extra register copies + // (e.g. fmov xN, dN for FP/SIMD registers). + const MDString *RegStr = cast<MDString>(MD->getOperand(0)); + LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT(); + const MachineFunction &MF = DAG.getMachineFunction(); + Register PhysReg = + TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); + if (PhysReg.isValid()) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); + MVT RegVT = *TRI->legalclasstypes_begin(*RC); + DAG.setRoot(DAG.getNode(ISD::FAKE_USE, sdl, MVT::Other, + {WriteChain, DAG.getRegister(PhysReg, RegVT)})); + } else { + DAG.setRoot(WriteChain); + } + return; + } case Intrinsic::memcpy: case Intrinsic::memcpy_inline: { const auto &MCI = cast<MemCpyInst>(I); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 9c6c5f245ff0b..79bd3dc984b9d 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -7360,6 +7360,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "llvm.sponentry must return a pointer to the stack", &Call); break; } + case Intrinsic::write_volatile_register: { + auto *MD = cast<MDNode>( + cast<MetadataAsValue>(Call.getArgOperand(0))->getMetadata()); + Check(MD->getNumOperands() == 1 && isa<MDString>(MD->getOperand(0)), + "llvm.write_volatile_register metadata must be a single MDString", + &Call); + break; + } }; // Verify that there aren't any unmediated control transfers between funclets. diff --git a/llvm/test/CodeGen/AArch64/write-volatile-register.ll b/llvm/test/CodeGen/AArch64/write-volatile-register.ll new file mode 100644 index 0000000000000..ad8292905583a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/write-volatile-register.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 -fast-isel=0 -global-isel=false < %s | FileCheck %s + +; Tests for llvm.write_volatile_register on AArch64. +; +; Unlike llvm.write_register, the volatile variant carries IntrHasSideEffects +; and emits a FAKE_USE of the target physical register after the WRITE_REGISTER. +; The FAKE_USE prevents the CopyToReg from being dead-eliminated while avoiding +; an extra cross-domain read (fmov xN, dM) that would arise if we kept the +; register live via READ_REGISTER + CopyFromReg instead. +; +; For Windows-specific tests with reserved GP register x18 see +; clang/test/CodeGen/arm64-microsoft-intrinsics.c. + +; -- Stack pointer ----------------------------------------------------------- +; sp is the canonical GP test: it is always accessible regardless of ABI. + +define void @write_volatile_sp(i64 %val) { +; CHECK-LABEL: write_volatile_sp: +; CHECK: // %bb.0: +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: // fake_use: $sp +; CHECK-NEXT: ret + call void @llvm.write_volatile_register.i64(metadata !0, i64 %val) + ret void +} + +; -- FP/SIMD d-registers: integer bit-pattern -> FP register ----------------- +; +; The caller passes an i64 bit-pattern (e.g. obtained from read_volatile_register). +; Writing it into a SIMD d-register requires a cross-domain integer->FP move +; (fmov dN, xM). The FAKE_USE ensures the CopyToReg is not dead-eliminated +; even though dN has no further GP-domain uses in this function. + +define void @write_volatile_d5(i64 %bits) { +; CHECK-LABEL: write_volatile_d5: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d5, x0 +; CHECK-NEXT: // fake_use: $d5 +; CHECK-NEXT: ret + call void @llvm.write_volatile_register.i64(metadata !1, i64 %bits) + ret void +} + +define void @write_volatile_d31(i64 %bits) { +; CHECK-LABEL: write_volatile_d31: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d31, x0 +; CHECK-NEXT: // fake_use: $d31 +; CHECK-NEXT: ret + call void @llvm.write_volatile_register.i64(metadata !2, i64 %bits) + ret void +} + +; -- Back-to-back writes must both survive ------------------------------------ +; +; Each call carries IntrHasSideEffects so neither may be suppressed on behalf +; of the other. Verify that both fmov + fake_use pairs appear in order. + +define void @write_volatile_d5_twice(i64 %a, i64 %b) { +; CHECK-LABEL: write_volatile_d5_twice: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d5, x0 +; CHECK-NEXT: // fake_use: $d5 +; CHECK-NEXT: fmov d5, x1 +; CHECK-NEXT: // fake_use: $d5 +; CHECK-NEXT: ret + call void @llvm.write_volatile_register.i64(metadata !1, i64 %a) + call void @llvm.write_volatile_register.i64(metadata !1, i64 %b) + ret void +} + +declare void @llvm.write_volatile_register.i64(metadata, i64) + +!0 = !{!"sp"} +!1 = !{!"d5"} +!2 = !{!"d31"} >From 03ea3dcff7b5598ac708989b1815029eaac39094 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Sat, 18 Apr 2026 09:13:27 -0300 Subject: [PATCH 5/8] [AArch64] Add the __setReg MS intrinsic for writing GP registers by index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __setReg(n, v) writes a 64-bit value into the GP register xN (x0–x28 or sp for n=31). The lowering reuses the __getReg register-name computation and emits llvm.write_volatile_register to prevent the store from being dead-eliminated by the backend. Documented at: https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180 --- clang/include/clang/Basic/BuiltinsAArch64.td | 1 + clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 3 ++- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 17 +++++++++++++---- clang/lib/Headers/intrin.h | 1 + clang/lib/Sema/SemaARM.cpp | 2 +- clang/test/CodeGen/arm64-microsoft-intrinsics.c | 13 +++++++++++++ 6 files changed, 31 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td index 6607fb1926065..5f6eda3e25418 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.td +++ b/clang/include/clang/Basic/BuiltinsAArch64.td @@ -335,6 +335,7 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", Header = "intrin.h" in { def _ReadWriteBarrier : AArch64NoPrefixTargetLibBuiltin<"void ()">; def __getReg : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (int)">; + def __setReg : AArch64NoPrefixTargetLibBuiltin<"void (int, unsigned long long int)">; def _ReadStatusReg : AArch64NoPrefixTargetLibBuiltin<"long long int (int)">; def _WriteStatusReg : AArch64NoPrefixTargetLibBuiltin<"void (int, long long int)">; def __sys : AArch64NoPrefixTargetLibBuiltin<"unsigned int (int, long long int)">; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 05ae340448033..bac78d5eefc86 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1930,7 +1930,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } - if (builtinID == clang::AArch64::BI__getReg) { + if (builtinID == clang::AArch64::BI__getReg || + builtinID == clang::AArch64::BI__setReg) { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index c88313f7b53e7..d66e5b554b483 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4810,7 +4810,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return CI; } - if (BuiltinID == clang::AArch64::BI__getReg) { + if (BuiltinID == clang::AArch64::BI__getReg || + BuiltinID == clang::AArch64::BI__setReg) { Expr::EvalResult Result; if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); @@ -4823,9 +4824,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); - llvm::Function *F = - CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty}); - return Builder.CreateCall(F, Metadata); + CallInst *CI; + if (BuiltinID == clang::AArch64::BI__getReg) { + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty}); + CI = Builder.CreateCall(F, Metadata); + } else { + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty}); + CI = Builder.CreateCall(F, {Metadata, EmitScalarExpr(E->getArg(1))}); + } + return CI; } if (BuiltinID == clang::AArch64::BI__break) { diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index b90e340d2d766..4815d536dbe46 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -377,6 +377,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { \*----------------------------------------------------------------------------*/ #if defined(__aarch64__) || defined(__arm64ec__) unsigned __int64 __getReg(int); +void __setReg(int, unsigned __int64); unsigned char _interlockedbittestandreset_acq(long volatile *, long); unsigned char _interlockedbittestandreset_nf(long volatile *, long); unsigned char _interlockedbittestandreset_rel(long volatile *, long); diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index f57c9c8b87cd5..16a93ad41ced5 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1174,7 +1174,7 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, if (BuiltinID == AArch64::BI__sys) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 0x3fff); - if (BuiltinID == AArch64::BI__getReg) + if (BuiltinID == AArch64::BI__getReg || BuiltinID == AArch64::BI__setReg) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 31); if (BuiltinID == AArch64::BI__break) diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index 79c5a8c823224..3aa0aa312d269 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -157,6 +157,19 @@ unsigned __int64 check__getReg(void) { // CHECK-MSCOMPAT: call i64 @llvm.read_volatile_register.i64(metadata ![[MD2:.*]]) // CHECK-MSCOMPAT: call i64 @llvm.read_volatile_register.i64(metadata ![[MD3:.*]]) +void test__setReg(unsigned __int64 v) +{ + __setReg(18, v); + __setReg(31, v); +} + +// CHECK-MSCOMPAT-LABEL: define{{.*}}void @test__setReg(i64{{.*}}%v){{.*}}{ +// CHECK-MSCOMPAT: %[[DATA_ADDR1:.*]] = load i64, ptr %v.addr, align 8 +// CHECK-MSCOMPAT: call void @llvm.write_volatile_register.i64(metadata ![[MD2]], i64 %[[DATA_ADDR1]]) +// CHECK-MSCOMPAT: %[[DATA_ADDR2:.*]] = load i64, ptr %v.addr, align 8 +// CHECK-MSCOMPAT: call void @llvm.write_volatile_register.i64(metadata ![[MD3]], i64 %[[DATA_ADDR2]]) +// CHECK-LINUX: error: call to undeclared function '__setReg' + #ifdef __LP64__ #define LONG __int32 #else >From fbbea5e9fd0afc599ce0e36715f7b5277de1b728 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Sun, 19 Apr 2026 09:41:12 -0300 Subject: [PATCH 6/8] [AArch64] Add the __setRegFp MS intrinsic for writing FP/SIMD d-registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __setRegFp(n, v) writes a double into d-register dN (0–31) by index. The value is bitcast to i64 and passed to llvm.write_volatile_register, ensuring the store survives the register allocator. The codegen for __getRegFp and __setRegFp is unified in a single if-block. Documented at: https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180 --- clang/include/clang/Basic/BuiltinsAArch64.td | 1 + .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 3 +- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 19 ++++++++++++ clang/lib/Headers/intrin.h | 1 + clang/lib/Sema/SemaARM.cpp | 3 +- .../test/CodeGen/arm64-microsoft-intrinsics.c | 17 +++++++++++ llvm/test/CodeGen/AArch64/read-fp-reg.ll | 29 +++++++++++++++++++ 7 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/read-fp-reg.ll diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td index 5f6eda3e25418..c9d8fc1d94ed2 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.td +++ b/clang/include/clang/Basic/BuiltinsAArch64.td @@ -335,6 +335,7 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", Header = "intrin.h" in { def _ReadWriteBarrier : AArch64NoPrefixTargetLibBuiltin<"void ()">; def __getReg : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (int)">; + def __getRegFp : AArch64NoPrefixTargetLibBuiltin<"double (int)">; def __setReg : AArch64NoPrefixTargetLibBuiltin<"void (int, unsigned long long int)">; def _ReadStatusReg : AArch64NoPrefixTargetLibBuiltin<"long long int (int)">; def _WriteStatusReg : AArch64NoPrefixTargetLibBuiltin<"void (int, long long int)">; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index bac78d5eefc86..303fa4efae1cd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1931,7 +1931,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, } if (builtinID == clang::AArch64::BI__getReg || - builtinID == clang::AArch64::BI__setReg) { + builtinID == clang::AArch64::BI__setReg || + builtinID == clang::AArch64::BI__getRegFp) { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index d66e5b554b483..ebaf317a94e56 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4837,6 +4837,25 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return CI; } + if (BuiltinID == clang::AArch64::BI__getRegFp) { + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable("Sema will ensure that the parameter is constant"); + + llvm::APSInt Value = Result.Val.getInt(); + LLVMContext &Context = CGM.getLLVMContext(); + std::string Reg = "d" + toString(Value, 10); + + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty}); + llvm::Value *Bits = Builder.CreateCall(F, Metadata); + return Builder.CreateBitCast(Bits, llvm::Type::getDoubleTy(Context)); + } + if (BuiltinID == clang::AArch64::BI__break) { Expr::EvalResult Result; if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 4815d536dbe46..2a0b7d5922ac2 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -377,6 +377,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { \*----------------------------------------------------------------------------*/ #if defined(__aarch64__) || defined(__arm64ec__) unsigned __int64 __getReg(int); +double __getRegFp(int _Reg); void __setReg(int, unsigned __int64); unsigned char _interlockedbittestandreset_acq(long volatile *, long); unsigned char _interlockedbittestandreset_nf(long volatile *, long); diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index 16a93ad41ced5..215d0daa3fb02 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1174,7 +1174,8 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, if (BuiltinID == AArch64::BI__sys) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 0x3fff); - if (BuiltinID == AArch64::BI__getReg || BuiltinID == AArch64::BI__setReg) + if (BuiltinID == AArch64::BI__getReg || BuiltinID == AArch64::BI__setReg || + BuiltinID == AArch64::BI__getRegFp) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 31); if (BuiltinID == AArch64::BI__break) diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index 3aa0aa312d269..1a1e81a26a714 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -170,6 +170,21 @@ void test__setReg(unsigned __int64 v) // CHECK-MSCOMPAT: call void @llvm.write_volatile_register.i64(metadata ![[MD3]], i64 %[[DATA_ADDR2]]) // CHECK-LINUX: error: call to undeclared function '__setReg' +double test__getRegFp(void) +{ + double volatile reg; + reg = __getRegFp(5); + reg = __getRegFp(31); + return reg; +} + +// CHECK-MSCOMPAT-LABEL: define{{.*}}double @test__getRegFp(){{.*}}{ +// CHECK-MSCOMPAT: [[BITS:%.*]] = call i64 @llvm.read_volatile_register.i64(metadata ![[MD4:.*]]) +// CHECK-MSCOMPAT: bitcast i64 [[BITS]] to double +// CHECK-MSCOMPAT: [[BITS:%.*]] = call i64 @llvm.read_volatile_register.i64(metadata ![[MD5:.*]]) +// CHECK-MSCOMPAT: bitcast i64 [[BITS]] to double +// CHECK-LINUX: error: call to undeclared function '__getRegFp' + #ifdef __LP64__ #define LONG __int32 #else @@ -633,3 +648,5 @@ void check__prefetch(void *arg1) { // CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"} // CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"} +// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"} +// CHECK-MSCOMPAT: ![[MD5]] = !{!"d31"} diff --git a/llvm/test/CodeGen/AArch64/read-fp-reg.ll b/llvm/test/CodeGen/AArch64/read-fp-reg.ll new file mode 100644 index 0000000000000..853f1fc051716 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/read-fp-reg.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64 -fast-isel=0 -global-isel=false < %s | FileCheck %s + +define double @test_getRegFp_d5() { +; CHECK-LABEL: test_getRegFp_d5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, d5 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.read_volatile_register.i64(metadata !0) + %1 = bitcast i64 %0 to double + ret double %1 +} + +define double @test_getRegFp_d31() { +; CHECK-LABEL: test_getRegFp_d31: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, d31 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.read_volatile_register.i64(metadata !1) + %1 = bitcast i64 %0 to double + ret double %1 +} + +declare i64 @llvm.read_volatile_register.i64(metadata) + +!0 = !{!"d5"} +!1 = !{!"d31"} >From 422d8dc494489b94ae03ab91175f362b8f27bb84 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Sun, 19 Apr 2026 10:03:37 -0300 Subject: [PATCH 7/8] [aarch64] Add support for the __setRegFp MS intrinsics The builtin writes to a hardware floating-point register using an integer index. It is ARM64 specific and it is documented at: <https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180> --- clang/include/clang/Basic/BuiltinsAArch64.td | 1 + .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 3 ++- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 21 ++++++++++++++----- clang/lib/Headers/intrin.h | 1 + clang/lib/Sema/SemaARM.cpp | 2 +- .../test/CodeGen/arm64-microsoft-intrinsics.c | 12 +++++++++++ 6 files changed, 33 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td index c9d8fc1d94ed2..873051720606c 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.td +++ b/clang/include/clang/Basic/BuiltinsAArch64.td @@ -336,6 +336,7 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", def _ReadWriteBarrier : AArch64NoPrefixTargetLibBuiltin<"void ()">; def __getReg : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (int)">; def __getRegFp : AArch64NoPrefixTargetLibBuiltin<"double (int)">; + def __setRegFp : AArch64NoPrefixTargetLibBuiltin<"void (int, double)">; def __setReg : AArch64NoPrefixTargetLibBuiltin<"void (int, unsigned long long int)">; def _ReadStatusReg : AArch64NoPrefixTargetLibBuiltin<"long long int (int)">; def _WriteStatusReg : AArch64NoPrefixTargetLibBuiltin<"void (int, long long int)">; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 303fa4efae1cd..e36f46f700d63 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1932,7 +1932,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, if (builtinID == clang::AArch64::BI__getReg || builtinID == clang::AArch64::BI__setReg || - builtinID == clang::AArch64::BI__getRegFp) { + builtinID == clang::AArch64::BI__getRegFp || + builtinID == clang::AArch64::BI__setRegFp) { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index ebaf317a94e56..9d8d9d7d97b49 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4837,7 +4837,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return CI; } - if (BuiltinID == clang::AArch64::BI__getRegFp) { + if (BuiltinID == clang::AArch64::BI__getRegFp || + BuiltinID == clang::AArch64::BI__setRegFp) { Expr::EvalResult Result; if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); @@ -4850,10 +4851,20 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); - llvm::Function *F = - CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty}); - llvm::Value *Bits = Builder.CreateCall(F, Metadata); - return Builder.CreateBitCast(Bits, llvm::Type::getDoubleTy(Context)); + llvm::Value *Ret; + if (BuiltinID == clang::AArch64::BI__getRegFp) { + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty}); + llvm::Value *Bits = Builder.CreateCall(F, Metadata); + Ret = Builder.CreateBitCast(Bits, llvm::Type::getDoubleTy(Context)); + } else { + llvm::Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Value *Bits = Builder.CreateBitCast(Val, Int64Ty); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty}); + Ret = Builder.CreateCall(F, {Metadata, Bits}); + } + return Ret; } if (BuiltinID == clang::AArch64::BI__break) { diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 2a0b7d5922ac2..7692d289fe90d 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -379,6 +379,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { unsigned __int64 __getReg(int); double __getRegFp(int _Reg); void __setReg(int, unsigned __int64); +void __setRegFp(int, double); unsigned char _interlockedbittestandreset_acq(long volatile *, long); unsigned char _interlockedbittestandreset_nf(long volatile *, long); unsigned char _interlockedbittestandreset_rel(long volatile *, long); diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index 215d0daa3fb02..62f6d7296192d 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1175,7 +1175,7 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 0x3fff); if (BuiltinID == AArch64::BI__getReg || BuiltinID == AArch64::BI__setReg || - BuiltinID == AArch64::BI__getRegFp) + BuiltinID == AArch64::BI__getRegFp || BuiltinID == AArch64::BI__setRegFp) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 31); if (BuiltinID == AArch64::BI__break) diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index 1a1e81a26a714..e20751806a60b 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -185,6 +185,18 @@ double test__getRegFp(void) // CHECK-MSCOMPAT: bitcast i64 [[BITS]] to double // CHECK-LINUX: error: call to undeclared function '__getRegFp' +void test__setRegFp(double v) +{ + __setRegFp(5, v); + __setRegFp(31, v); +} +// CHECK-MSCOMPAT-LABEL: define{{.*}}void @test__setRegFp(double{{.*}}%v){{.*}}{ +// CHECK-MSCOMPAT: [[BITS:%.*]] = bitcast double {{.*}} to i64 +// CHECK-MSCOMPAT: call void @llvm.write_volatile_register.i64(metadata ![[MD4:.*]], i64 [[BITS]]) +// CHECK-MSCOMPAT: [[BITS:%.*]] = bitcast double {{.*}} to i64 +// CHECK-MSCOMPAT: call void @llvm.write_volatile_register.i64(metadata ![[MD5:.*]], i64 [[BITS]]) +// CHECK-LINUX: error: call to undeclared function '__setRegFp' + #ifdef __LP64__ #define LONG __int32 #else >From 4d3377d8acd9a0676ad0a049f088de934a522b91 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <[email protected]> Date: Mon, 20 Apr 2026 13:11:40 -0300 Subject: [PATCH 8/8] [AArch64] Add the __prefetch2 MS intrinsic __prefetch2(ptr, hint) issues a PRFM instruction with an explicit 5-bit opcode. The hint encodes the PRFM opcode directly: bits[4:3]=type (PLD/PLI/PST), bits[2:1]=target (L1/L2/L3), bit[0]=policy (KEEP/STRM). The argument must be a compile-time constant in [0, 31]. Neither the MSVC headers nor the documentation define named constants for building the hint; the caller is expected to construct the 5-bit field directly from the AArch64 PRFM encoding. Documented at: https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180 --- clang/include/clang/Basic/BuiltinsAArch64.td | 1 + .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 3 ++- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 +++++++++++++++++++ clang/lib/Headers/intrin.h | 1 + clang/lib/Sema/SemaARM.cpp | 3 +++ .../test/CodeGen/arm64-microsoft-intrinsics.c | 10 ++++++++++ 6 files changed, 37 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td index 873051720606c..15257f3db5b41 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.td +++ b/clang/include/clang/Basic/BuiltinsAArch64.td @@ -403,6 +403,7 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", Header = "intrin.h" in { def __prefetch : AArch64NoPrefixTargetLibBuiltin<"void (void const *)">; + def __prefetch2 : AArch64NoPrefixTargetLibBuiltin<"void (void const *, unsigned char)">; } let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", Header = "intrin.h" in { diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index e36f46f700d63..50f7e4ca82713 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2152,7 +2152,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } - if (builtinID == AArch64::BI__prefetch) { + if (builtinID == AArch64::BI__prefetch || + builtinID == AArch64::BI__prefetch2) { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 9d8d9d7d97b49..d9ed09277fa0d 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -5296,6 +5296,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Address, RW, Locality, Data}); } + if (BuiltinID == AArch64::BI__prefetch2) { + Value *Address = EmitScalarExpr(E->getArg(0)); + llvm::APSInt PrfOp = E->getArg(1)->EvaluateKnownConstInt(CGM.getContext()); + // Decode 5-bit PRFM encoding: bits[4:3]=type, bits[2:1]=target, + // bit[0]=policy + // type: PLD=0(load), PLI=1(instr), PST=2(store) + // target: L1=0, L2=1, L3=2 + // policy: KEEP=0, STRM=1 + uint64_t Op = PrfOp.getZExtValue(); + uint64_t Type = (Op >> 3) & 0x3; + uint64_t Target = (Op >> 1) & 0x3; + uint64_t Policy = Op & 0x1; + Value *RW = Builder.getInt32(Type == 2 ? 1 : 0); + Value *Local = Builder.getInt32(Target); + Value *IsStream = Builder.getInt32(Policy); + Value *IsData = Builder.getInt32(Type == 1 ? 0 : 1); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_prefetch); + return Builder.CreateCall(F, {Address, RW, Local, IsStream, IsData}); + } + if (BuiltinID == AArch64::BI__hlt) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt); Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 7692d289fe90d..4cb8cac960bcf 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -450,6 +450,7 @@ unsigned int _CountTrailingZeros64(unsigned __int64); unsigned int __hlt(unsigned int, ...); void __cdecl __prefetch(const void *); +void __cdecl __prefetch2(const void *, unsigned char); #endif diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index 62f6d7296192d..d78baab661701 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1178,6 +1178,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, BuiltinID == AArch64::BI__getRegFp || BuiltinID == AArch64::BI__setRegFp) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 31); + if (BuiltinID == AArch64::BI__prefetch2) + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 31); + if (BuiltinID == AArch64::BI__break) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 0xffff); diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index e20751806a60b..e6a415a0d8805 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -657,6 +657,16 @@ void check__prefetch(void *arg1) { // CHECK-MSCOMPAT: call void @llvm.prefetch.p0(ptr %[[VAR0]], i32 0, i32 3, i32 1) // CHECK-MSCOMPAT: ret void +void check__prefetch2(void *arg1) { + __prefetch2(arg1, 0x00); + __prefetch2(arg1, 0x13); +} + +// CHECK-MSCOMPAT-LABEL: define{{.*}}void @check__prefetch2(ptr{{.*}}%arg1){{.*}}{ +// CHECK-MSCOMPAT: call void @llvm.aarch64.prefetch(ptr %{{.*}}, i32 0, i32 0, i32 0, i32 1) +// CHECK-MSCOMPAT: call void @llvm.aarch64.prefetch(ptr %{{.*}}, i32 1, i32 1, i32 1, i32 1) +// CHECK-LINUX: error: call to undeclared function '__prefetch2' + // CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"} // CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
