https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/197618
>From f6ebebcf14ee9bc60076f1e5382cf1e6048fc2f5 Mon Sep 17 00:00:00 2001 From: jofrn <[email protected]> Date: Wed, 13 May 2026 16:03:35 -0700 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic store Vector types of 2 elements must be widened. This change does this for vector types of atomic store in SelectionDAG so that it can translate aligned vectors of >1 size. --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 54 +++++ llvm/test/CodeGen/X86/atomic-load-store.ll | 198 ++++++++++++++++++ 3 files changed, 253 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 9c37eb8065ba5..a1c0e68049544 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1104,6 +1104,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N); SDValue WidenVecOp_FAKE_USE(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_ATOMIC_STORE(AtomicSDNode *ST); SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c6fc5e2152528..05484d0dd7d33 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -6608,6 +6608,23 @@ static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, return LdOp; } +/// Inverse of coerceLoadedValue: pull a FirstVT-sized scalar/vector out of the +/// widened value so it can be issued in a single atomic store. +static SDValue coerceStoredValue(SDValue StVal, EVT FirstVT, EVT WidenVT, + TypeSize FirstVTWidth, const SDLoc &dl, + SelectionDAG &DAG) { + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { + unsigned NumElts = + WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, StVal); + return DAG.getExtractVectorElt(dl, FirstVT, VecOp, 0); + } + assert(FirstVT == WidenVT && "First value type must equal widen value type"); + return StVal; +} + static std::optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, unsigned Width, EVT WidenVT, unsigned Align, @@ -7445,6 +7462,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::ATOMIC_STORE: + Res = WidenVecOp_ATOMIC_STORE(cast<AtomicSDNode>(N)); + break; case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break; case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo); @@ -8061,6 +8081,40 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { report_fatal_error("Unable to widen vector store"); } +SDValue DAGTypeLegalizer::WidenVecOp_ATOMIC_STORE(AtomicSDNode *ST) { + EVT StVT = ST->getMemoryVT(); + SDLoc dl(ST); + assert(StVT.isVector() && "Expected vector"); + + SDValue StVal = GetWidenedVector(ST->getVal()); + EVT WidenVT = StVal.getValueType(); + assert(WidenVT.isVector() && "Expected vector"); + assert(StVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(StVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + TypeSize StWidth = StVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - StWidth; + + // Find the vector type that can store the original memory width in one + // atomic operation. + std::optional<EVT> FirstVT = + findMemType(DAG, TLI, StWidth.getKnownMinValue(), WidenVT, /*StAlign=*/0, + WidthDiff.getKnownMinValue()); + if (!FirstVT) + return SDValue(); + + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue StOp = + coerceStoredValue(StVal, *FirstVT, WidenVT, FirstVTWidth, dl, DAG); + + return DAG.getAtomic(ISD::ATOMIC_STORE, dl, *FirstVT, ST->getChain(), StOp, + ST->getBasePtr(), ST->getMemOperand()); +} + SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of vp_store"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 8619386fe3c88..659cdec91d3e7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -352,6 +352,204 @@ define void @store_atomic_vec1_double_align(ptr %x, <1 x double> %v) nounwind { ret void } +define void @store_atomic_vec2_i8(ptr %x, <2 x i8> %v) { +; CHECK-SSE-O3-LABEL: store_atomic_vec2_i8: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movd %xmm0, %eax +; CHECK-SSE-O3-NEXT: movw %ax, (%rdi) +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec2_i8: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax +; CHECK-AVX-O3-NEXT: movw %ax, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE-O0-LABEL: store_atomic_vec2_i8: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd %xmm0, %eax +; CHECK-SSE-O0-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-SSE-O0-NEXT: movw %ax, (%rdi) +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec2_i8: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax +; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-AVX-O0-NEXT: movw %ax, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <2 x i8> %v, ptr %x release, align 4 + ret void +} + +define void @store_atomic_vec2_i16(ptr %x, <2 x i16> %v) { +; CHECK-SSE-O3-LABEL: store_atomic_vec2_i16: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movd %xmm0, %eax +; CHECK-SSE-O3-NEXT: movl %eax, (%rdi) +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec2_i16: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax +; CHECK-AVX-O3-NEXT: movl %eax, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE-O0-LABEL: store_atomic_vec2_i16: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd %xmm0, %eax +; CHECK-SSE-O0-NEXT: movl %eax, (%rdi) +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec2_i16: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax +; CHECK-AVX-O0-NEXT: movl %eax, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <2 x i16> %v, ptr %x release, align 4 + ret void +} + +define void @store_atomic_vec2_ptr270(ptr %x, <2 x ptr addrspace(270)> %v) { +; CHECK-SSE-O3-LABEL: store_atomic_vec2_ptr270: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movq %xmm0, %rax +; CHECK-SSE-O3-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec2_ptr270: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O3-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE-O0-LABEL: store_atomic_vec2_ptr270: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movq %xmm0, %rax +; CHECK-SSE-O0-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec2_ptr270: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O0-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <2 x ptr addrspace(270)> %v, ptr %x release, align 8 + ret void +} + +define void @store_atomic_vec2_i32_align(ptr %x, <2 x i32> %v) { +; CHECK-SSE-O3-LABEL: store_atomic_vec2_i32_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movq %xmm0, %rax +; CHECK-SSE-O3-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec2_i32_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O3-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE-O0-LABEL: store_atomic_vec2_i32_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movq %xmm0, %rax +; CHECK-SSE-O0-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec2_i32_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O0-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <2 x i32> %v, ptr %x release, align 8 + ret void +} + +define void @store_atomic_vec2_float_align(ptr %x, <2 x float> %v) { +; CHECK-SSE-O3-LABEL: store_atomic_vec2_float_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movq %xmm0, %rax +; CHECK-SSE-O3-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec2_float_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O3-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE-O0-LABEL: store_atomic_vec2_float_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movq %xmm0, %rax +; CHECK-SSE-O0-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec2_float_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O0-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <2 x float> %v, ptr %x release, align 8 + ret void +} + +define void @store_atomic_vec4_i8(ptr %x, <4 x i8> %v) nounwind { +; CHECK-SSE-O3-LABEL: store_atomic_vec4_i8: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movd %xmm0, %eax +; CHECK-SSE-O3-NEXT: movl %eax, (%rdi) +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec4_i8: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax +; CHECK-AVX-O3-NEXT: movl %eax, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE-O0-LABEL: store_atomic_vec4_i8: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd %xmm0, %eax +; CHECK-SSE-O0-NEXT: movl %eax, (%rdi) +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec4_i8: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax +; CHECK-AVX-O0-NEXT: movl %eax, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <4 x i8> %v, ptr %x release, align 4 + ret void +} + +define void @store_atomic_vec4_i16(ptr %x, <4 x i16> %v) nounwind { +; CHECK-SSE-O3-LABEL: store_atomic_vec4_i16: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movq %xmm0, %rax +; CHECK-SSE-O3-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec4_i16: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O3-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE-O0-LABEL: store_atomic_vec4_i16: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movq %xmm0, %rax +; CHECK-SSE-O0-NEXT: movq %rax, (%rdi) +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec4_i16: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax +; CHECK-AVX-O0-NEXT: movq %rax, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <4 x i16> %v, ptr %x release, align 8 + ret void +} + define <2 x i8> @atomic_vec2_i8(ptr %x) { ; CHECK-SSE-O3-LABEL: atomic_vec2_i8: ; CHECK-SSE-O3: # %bb.0: _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
