https://github.com/jyknight updated
https://github.com/llvm/llvm-project/pull/74275
>From 7baffd6d1f4254b1bd725ddc883a360d79267435 Mon Sep 17 00:00:00 2001
From: James Y Knight
Date: Sat, 2 Dec 2023 23:05:26 -0500
Subject: [PATCH 1/3] [X86] Use plain load/store instead of cmpxchg16b for
atomics with AVX
In late 2021, both Intel and AMD finally documented that every
AVX-capable CPU has always been guaranteed to execute aligned 16-byte
loads/stores atomically, and further, guaranteed that all future CPUs
with AVX will do so as well.
Therefore, we may use normal SSE 128-bit load/store instructions to
implement atomics, if AVX is enabled.
Also adjust handling of unordered atomic load/store in LegalizeIntegerTypes.cpp;
currently, it hardcodes a fallback to ATOMIC_CMP_SWAP_WITH_SUCCESS,
but we should instead fallback to ATOMIC_LOAD/ATOMIC_STORE.
Per AMD64 Architecture Programmer's manual, 7.3.2 Access Atomicity:
"""
Processors that report [AVX] extend the atomicity for cacheable,
naturally-aligned single loads or stores from a quadword to a double
quadword.
"""
Per Intel's SDM:
"""
Processors that enumerate support for Intel(R) AVX guarantee that the
16-byte memory operations performed by the following instructions will
always be carried out atomically:
- MOVAPD, MOVAPS, and MOVDQA.
- VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
- VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded with
EVEX.128 and k0 (masking disabled).
"""
This was also confirmed to be true for Zhaoxin CPUs with AVX, in
https://gcc.gnu.org/PR104688
---
.../SelectionDAG/LegalizeIntegerTypes.cpp | 28 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 94 +--
llvm/test/CodeGen/X86/atomic-non-integer.ll | 24 +-
llvm/test/CodeGen/X86/atomic-unordered.ll | 83 +-
llvm/test/CodeGen/X86/atomic128.ll| 247 +++---
llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll | 8 +-
6 files changed, 256 insertions(+), 228 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 54698edce7d6f8..5b496feee7a8f4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3831,17 +3831,14 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode
*N, SDValue ,
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue , SDValue ) {
if (N->isAtomic()) {
-// It's typical to have larger CAS than atomic load instructions.
SDLoc dl(N);
EVT VT = N->getMemoryVT();
-SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
-SDValue Zero = DAG.getConstant(0, dl, VT);
-SDValue Swap = DAG.getAtomicCmpSwap(
-ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
-VT, VTs, N->getOperand(0),
-N->getOperand(1), Zero, Zero, N->getMemOperand());
-ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
-ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+// We may support larger values in atomic_load than in a normal load
+// (without splitting), so switch over if needed.
+SDValue New = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, N->getOperand(0),
+N->getOperand(1), N->getMemOperand());
+ReplaceValueWith(SDValue(N, 0), New.getValue(0));
+ReplaceValueWith(SDValue(N, 1), New.getValue(1));
return;
}
@@ -5399,14 +5396,13 @@ SDValue DAGTypeLegalizer::ExpandIntOp_XINT_TO_FP(SDNode
*N) {
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->isAtomic()) {
-// It's typical to have larger CAS than atomic store instructions.
+// We may support larger values in atomic_store than in a normal store
+// (without splitting), so switch over if needed.
SDLoc dl(N);
-SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- N->getMemoryVT(),
- N->getOperand(0), N->getOperand(2),
- N->getOperand(1),
- N->getMemOperand());
-return Swap.getValue(1);
+SDValue New =
+DAG.getAtomic(ISD::ATOMIC_STORE, dl, N->getMemoryVT(),
N->getOperand(0),
+ N->getOperand(1), N->getOperand(2), N->getMemOperand());
+return New.getValue(0);
}
if (ISD::isNormalStore(N))
return ExpandOp_NormalStore(N, OpNo);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6167be7bdf84e9..1880cbc3a5bf35 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -515,6 +515,13 @@ X86TargetLowering::X86TargetLowering(const
X86TargetMachine ,
if (!Subtarget.is64Bit())
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+ if (Subtarget.is64Bit() && Subtarget.hasAVX()) {
+// All CPUs supporting AVX will atomically load/store