https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/148900
>From a9e62b36fb879b7b0278d299df64e11ba6605041 Mon Sep 17 00:00:00 2001 From: jofrn <[email protected]> Date: Tue, 15 Jul 2025 13:03:15 -0400 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. It also adds support for 128 bit lowering in tablegen to support SSE/AVX. --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 19 +- llvm/test/CodeGen/ARM/atomic-load-store.ll | 51 ++++ llvm/test/CodeGen/X86/atomic-load-store.ll | 91 +++++- .../X86/expand-atomic-non-integer.ll | 287 ++++++++++++++---- 4 files changed, 382 insertions(+), 66 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 53f1cfe24a68d..8dc14bb416345 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -483,7 +483,9 @@ LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) { NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); - Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); + Value *NewVal = LI->getType()->isPtrOrPtrVectorTy() + ? Builder.CreateIntToPtr(NewLI, LI->getType()) + : Builder.CreateBitCast(NewLI, LI->getType()); LI->replaceAllUsesWith(NewVal); LI->eraseFromParent(); return NewLI; @@ -2093,9 +2095,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; - if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); - else { + if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's <n x ptr> vector type + auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType()); + auto *VTy = dyn_cast<VectorType>(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { + unsigned AS = PtrTy->getAddressSpace(); + Value *BC = Builder.CreateBitCast( + Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); + V = Builder.CreateIntToPtr(BC, I->getType()); + } else + V = Builder.CreateBitOrPointerCast(Result, I->getType()); + } else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldr r0, [r0] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldr r0, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: movs r1, #0 +; THUMBONE-NEXT: mov r2, r1 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r1, #2 +; ARMV4-NEXT: bl __atomic_load_4 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: ldr r0, [r0] +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: ldr r0, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 00310f6d1f219..867a4acb791bc 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -244,6 +244,96 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-SSE2-O3-LABEL: atomic_vec2_ptr_align: +; CHECK-SSE2-O3: # %bb.0: +; CHECK-SSE2-O3-NEXT: pushq %rax +; CHECK-SSE2-O3-NEXT: movl $2, %esi +; CHECK-SSE2-O3-NEXT: callq __atomic_load_16@PLT +; CHECK-SSE2-O3-NEXT: movq %rdx, %xmm1 +; CHECK-SSE2-O3-NEXT: movq %rax, %xmm0 +; CHECK-SSE2-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE2-O3-NEXT: popq %rax +; CHECK-SSE2-O3-NEXT: retq +; +; CHECK-SSE4-O3-LABEL: atomic_vec2_ptr_align: +; CHECK-SSE4-O3: # %bb.0: +; CHECK-SSE4-O3-NEXT: movaps (%rdi), %xmm0 +; CHECK-SSE4-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_ptr_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE2-O0-LABEL: atomic_vec2_ptr_align: +; CHECK-SSE2-O0: # %bb.0: +; CHECK-SSE2-O0-NEXT: pushq %rax +; CHECK-SSE2-O0-NEXT: movl $2, %esi +; CHECK-SSE2-O0-NEXT: callq __atomic_load_16@PLT +; CHECK-SSE2-O0-NEXT: movq %rdx, %xmm1 +; CHECK-SSE2-O0-NEXT: movq %rax, %xmm0 +; CHECK-SSE2-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE2-O0-NEXT: popq %rax +; CHECK-SSE2-O0-NEXT: retq +; +; CHECK-SSE4-O0-LABEL: atomic_vec2_ptr_align: +; CHECK-SSE4-O0: # %bb.0: +; CHECK-SSE4-O0-NEXT: movapd (%rdi), %xmm0 +; CHECK-SSE4-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_ptr_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-SSE2-O3-LABEL: atomic_vec4_ptr270: +; CHECK-SSE2-O3: # %bb.0: +; CHECK-SSE2-O3-NEXT: pushq %rax +; CHECK-SSE2-O3-NEXT: movl $2, %esi +; CHECK-SSE2-O3-NEXT: callq __atomic_load_16@PLT +; CHECK-SSE2-O3-NEXT: movq %rdx, %xmm1 +; CHECK-SSE2-O3-NEXT: movq %rax, %xmm0 +; CHECK-SSE2-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE2-O3-NEXT: popq %rax +; CHECK-SSE2-O3-NEXT: retq +; +; CHECK-SSE4-O3-LABEL: atomic_vec4_ptr270: +; CHECK-SSE4-O3: # %bb.0: +; CHECK-SSE4-O3-NEXT: movaps (%rdi), %xmm0 +; CHECK-SSE4-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_ptr270: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE2-O0-LABEL: atomic_vec4_ptr270: +; CHECK-SSE2-O0: # %bb.0: +; CHECK-SSE2-O0-NEXT: pushq %rax +; CHECK-SSE2-O0-NEXT: movl $2, %esi +; CHECK-SSE2-O0-NEXT: callq __atomic_load_16@PLT +; CHECK-SSE2-O0-NEXT: movq %rdx, %xmm1 +; CHECK-SSE2-O0-NEXT: movq %rax, %xmm0 +; CHECK-SSE2-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE2-O0-NEXT: popq %rax +; CHECK-SSE2-O0-NEXT: retq +; +; CHECK-SSE4-O0-LABEL: atomic_vec4_ptr270: +; CHECK-SSE4-O0: # %bb.0: +; CHECK-SSE4-O0-NEXT: movapd (%rdi), %xmm0 +; CHECK-SSE4-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_ptr270: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16 + ret <4 x ptr addrspace(270)> %ret +} define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align: @@ -727,7 +817,6 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind { } define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { -; ; CHECK-SSE2-O3-LABEL: atomic_vec4_float_align: ; CHECK-SSE2-O3: # %bb.0: ; CHECK-SSE2-O3-NEXT: pushq %rax diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll index 84c7df120e32f..17d99c6459604 100644 --- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll +++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 -; RUN: opt -S %s -passes=atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s +; RUN: opt -S %s -passes=atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK64 +; RUN: opt -S %s -passes=atomic-expand -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK32 ; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and ; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this @@ -94,98 +95,262 @@ define void @float_store_expand_addr1(ptr addrspace(1) %ptr, float %v) { } define void @pointer_cmpxchg_expand(ptr %ptr, ptr %v) { -; CHECK-LABEL: define void @pointer_cmpxchg_expand( -; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst monotonic, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 -; CHECK-NEXT: ret void +; CHECK64-LABEL: define void @pointer_cmpxchg_expand( +; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK64-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst monotonic, align 8 +; CHECK64-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK64-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK64-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK64-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @pointer_cmpxchg_expand( +; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK32-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 +; CHECK32-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i32 0, i32 [[TMP1]] seq_cst monotonic, align 4 +; CHECK32-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +; CHECK32-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +; CHECK32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +; CHECK32-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK32-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK32-NEXT: ret void ; cmpxchg ptr %ptr, ptr null, ptr %v seq_cst monotonic ret void } define void @pointer_cmpxchg_expand2(ptr %ptr, ptr %v) { -; CHECK-LABEL: define void @pointer_cmpxchg_expand2( -; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] release monotonic, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 -; CHECK-NEXT: ret void +; CHECK64-LABEL: define void @pointer_cmpxchg_expand2( +; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK64-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] release monotonic, align 8 +; CHECK64-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK64-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK64-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK64-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @pointer_cmpxchg_expand2( +; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK32-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 +; CHECK32-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i32 0, i32 [[TMP1]] release monotonic, align 4 +; CHECK32-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +; CHECK32-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +; CHECK32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +; CHECK32-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK32-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK32-NEXT: ret void ; cmpxchg ptr %ptr, ptr null, ptr %v release monotonic ret void } define void @pointer_cmpxchg_expand3(ptr %ptr, ptr %v) { -; CHECK-LABEL: define void @pointer_cmpxchg_expand3( -; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 -; CHECK-NEXT: ret void +; CHECK64-LABEL: define void @pointer_cmpxchg_expand3( +; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK64-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK64-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK64-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK64-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK64-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @pointer_cmpxchg_expand3( +; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK32-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 +; CHECK32-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i32 0, i32 [[TMP1]] seq_cst seq_cst, align 4 +; CHECK32-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +; CHECK32-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +; CHECK32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +; CHECK32-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK32-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK32-NEXT: ret void ; cmpxchg ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } define void @pointer_cmpxchg_expand4(ptr %ptr, ptr %v) { -; CHECK-LABEL: define void @pointer_cmpxchg_expand4( -; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 -; CHECK-NEXT: ret void +; CHECK64-LABEL: define void @pointer_cmpxchg_expand4( +; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK64-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK64-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK64-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK64-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK64-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @pointer_cmpxchg_expand4( +; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK32-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 +; CHECK32-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i32 0, i32 [[TMP1]] seq_cst seq_cst, align 4 +; CHECK32-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +; CHECK32-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +; CHECK32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +; CHECK32-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK32-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK32-NEXT: ret void ; cmpxchg weak ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } define void @pointer_cmpxchg_expand5(ptr %ptr, ptr %v) { -; CHECK-LABEL: define void @pointer_cmpxchg_expand5( -; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 -; CHECK-NEXT: ret void +; CHECK64-LABEL: define void @pointer_cmpxchg_expand5( +; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK64-NEXT: [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK64-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK64-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK64-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK64-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @pointer_cmpxchg_expand5( +; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK32-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 +; CHECK32-NEXT: [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i32 0, i32 [[TMP1]] seq_cst seq_cst, align 4 +; CHECK32-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +; CHECK32-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +; CHECK32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +; CHECK32-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK32-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK32-NEXT: ret void ; cmpxchg volatile ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, ptr addrspace(2) %v) { -; CHECK-LABEL: define void @pointer_cmpxchg_expand6( -; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2) -; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } [[TMP6]], i1 [[TMP4]], 1 -; CHECK-NEXT: ret void +; CHECK64-LABEL: define void @pointer_cmpxchg_expand6( +; CHECK64-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i64 +; CHECK64-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK64-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK64-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2) +; CHECK64-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) [[TMP5]], 0 +; CHECK64-NEXT: [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @pointer_cmpxchg_expand6( +; CHECK32-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) { +; CHECK32-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i32 +; CHECK32-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 0, i32 [[TMP1]] seq_cst seq_cst, align 4 +; CHECK32-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +; CHECK32-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +; CHECK32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(2) +; CHECK32-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) [[TMP5]], 0 +; CHECK32-NEXT: [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK32-NEXT: ret void ; cmpxchg ptr addrspace(1) %ptr, ptr addrspace(2) null, ptr addrspace(2) %v seq_cst seq_cst ret void } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK64-LABEL: define <2 x ptr> @atomic_vec2_ptr_align( +; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK64-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK64-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64> +; CHECK64-NEXT: [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr> +; CHECK64-NEXT: ret <2 x ptr> [[TMP3]] +; +; CHECK32-LABEL: define <2 x ptr> @atomic_vec2_ptr_align( +; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK32-NEXT: [[RET:%.*]] = load atomic <2 x ptr>, ptr [[X]] acquire, align 16 +; CHECK32-NEXT: ret <2 x ptr> [[RET]] +; + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + +define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind { +; CHECK64-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align( +; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK64-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK64-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32> +; CHECK64-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)> +; CHECK64-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]] +; +; CHECK32-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align( +; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x ptr addrspace(270)>, align 16 +; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]]) +; CHECK32-NEXT: call void @__atomic_load(i32 16, ptr [[X]], ptr [[TMP1]], i32 2) +; CHECK32-NEXT: [[TMP2:%.*]] = load <4 x ptr addrspace(270)>, ptr [[TMP1]], align 16 +; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]]) +; CHECK32-NEXT: ret <4 x ptr addrspace(270)> [[TMP2]] +; + %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16 + ret <4 x ptr addrspace(270)> %ret +} + +define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind { +; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8 +; CHECK-NEXT: ret <2 x i16> [[RET]] +; + %ret = load atomic <2 x i16>, ptr %x acquire, align 8 + ret <2 x i16> %ret +} + +define <2 x half> @atomic_vec2_half(ptr %x) nounwind { +; CHECK-LABEL: define <2 x half> @atomic_vec2_half( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8 +; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half> +; CHECK-NEXT: ret <2 x half> [[RET]] +; + %ret = load atomic <2 x half>, ptr %x acquire, align 8 + ret <2 x half> %ret +} + +define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind { +; CHECK64-LABEL: define <4 x i32> @atomic_vec4_i32( +; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK64-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK64-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32> +; CHECK64-NEXT: ret <4 x i32> [[TMP2]] +; +; CHECK32-LABEL: define <4 x i32> @atomic_vec4_i32( +; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x i32>, align 16 +; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]]) +; CHECK32-NEXT: call void @__atomic_load(i32 16, ptr [[X]], ptr [[TMP1]], i32 2) +; CHECK32-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 16 +; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]]) +; CHECK32-NEXT: ret <4 x i32> [[TMP2]] +; + %ret = load atomic <4 x i32>, ptr %x acquire, align 16 + ret <4 x i32> %ret +} + +define <4 x float> @atomic_vec4_float(ptr %x) nounwind { +; CHECK64-LABEL: define <4 x float> @atomic_vec4_float( +; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK64-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK64-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float> +; CHECK64-NEXT: ret <4 x float> [[TMP2]] +; +; CHECK32-LABEL: define <4 x float> @atomic_vec4_float( +; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x float>, align 16 +; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]]) +; CHECK32-NEXT: call void @__atomic_load(i32 16, ptr [[X]], ptr [[TMP1]], i32 2) +; CHECK32-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP1]], align 16 +; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]]) +; CHECK32-NEXT: ret <4 x float> [[TMP2]] +; + %ret = load atomic <4 x float>, ptr %x acquire, align 16 + ret <4 x float> %ret +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
