[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
This revision was automatically updated to reflect the committed changes. Closed by commit rG3f5bf35f868d: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores (authored by kmclaughlin). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71000/new/ https://reviews.llvm.org/D71000 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -0,0 +1,95 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; STNT1B +; + +define void @stnt1b_i8( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1b_i8: +; CHECK: stnt1b { z0.b }, p0, [x0, #0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1H +; + +define void @stnt1h_i16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_i16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1h_f16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_f16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1W +; + +define void @stnt1w_i32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_i32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1w_f32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_f32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1D +; + +define void @stnt1d_i64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_i64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1d_f64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_f64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, + %pred, +* %addr) + ret void +} + +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LDNT1B +; + +define @ldnt1b_i8( %pred, * %addr) { +; CHECK-LABEL: ldnt1b_i8: +; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv16i8( %pred, + * %addr) + ret %res +} + +; +; LDNT1H +; + +define @ldnt1h_i16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_i16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8i16( %pred, + * %addr) + ret %res +} + +define @ldnt1h_f16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_f16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8f16( %pred, + * %addr) + ret %res +} + +; +; LDNT1W +; + +define @ldnt1w_i32( %pred, * %addr) { +; CHECK-LABEL: ldnt1w_i32: +; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2] +; CHECK-NEXT: r
[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
kmclaughlin updated this revision to Diff 233298. kmclaughlin marked an inline comment as done. kmclaughlin added a comment. - Changed 'Offset' value used by getMaskedLoad & getMaskedStore to scalar type CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71000/new/ https://reviews.llvm.org/D71000 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -0,0 +1,95 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; STNT1B +; + +define void @stnt1b_i8( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1b_i8: +; CHECK: stnt1b { z0.b }, p0, [x0, #0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1H +; + +define void @stnt1h_i16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_i16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1h_f16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_f16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1W +; + +define void @stnt1w_i32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_i32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1w_f32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_f32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1D +; + +define void @stnt1d_i64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_i64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1d_f64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_f64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, + %pred, +* %addr) + ret void +} + +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LDNT1B +; + +define @ldnt1b_i8( %pred, * %addr) { +; CHECK-LABEL: ldnt1b_i8: +; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv16i8( %pred, + * %addr) + ret %res +} + +; +; LDNT1H +; + +define @ldnt1h_i16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_i16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8i16( %pred, + * %addr) + ret %res +} + +define @ldnt1h_f16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_f16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8f16( %pred, + * %addr) + ret %res +} + +; +; LDNT1W +; + +define @ldnt1w_i32( %pred, * %addr) { +; CHECK-LABEL: ldnt1w_i32: +; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve
[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
efriedma accepted this revision. efriedma added a comment. This revision is now accepted and ready to land. LGTM Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:10800 +MINode->getOperand(3), DAG.getUNDEF(LoadVT), +MINode->getOperand(2), PassThru, +MINode->getMemoryVT(), MINode->getMemOperand(), Offset (the fifth argument) is supposed to be a scalar, not a vector. Probably nothing actually checks that for loads that aren't pre/post-indexed. (Same applies to MSTORE.) CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71000/new/ https://reviews.llvm.org/D71000 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
kmclaughlin marked 2 inline comments as done. kmclaughlin added inline comments. Comment at: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:6587 + else if (!Size) +Size = MemVT.getStoreSize().getKnownMinSize(); efriedma wrote: > In order for alias analysis to correctly handle a MachineMemOperand, the > "Size" of an operation has to be conservative, in the sense that the the > number of bytes accessed must be at most "Size". Otherwise we'll assume two > operations don't alias when they actually do. > > For a scaled vector, we don't know the size, so we have to conservatively > pass "MemoryLocation::UnknownSize". Thanks @efriedma, I have changed this to use MemoryLocation::UnknownSize CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71000/new/ https://reviews.llvm.org/D71000 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
kmclaughlin updated this revision to Diff 232870. kmclaughlin added a comment. - Set 'Size' to MemoryLocation::UnknownSize for scalable vectors in getMemIntrinsicNode - Ensure MLOAD zeroes inactive lanes by using a zero value for the PassThru in getMaskedLoad CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71000/new/ https://reviews.llvm.org/D71000 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -0,0 +1,95 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; STNT1B +; + +define void @stnt1b_i8( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1b_i8: +; CHECK: stnt1b { z0.b }, p0, [x0, #0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1H +; + +define void @stnt1h_i16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_i16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1h_f16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_f16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1W +; + +define void @stnt1w_i32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_i32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1w_f32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_f32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1D +; + +define void @stnt1d_i64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_i64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1d_f64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_f64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, + %pred, +* %addr) + ret void +} + +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LDNT1B +; + +define @ldnt1b_i8( %pred, * %addr) { +; CHECK-LABEL: ldnt1b_i8: +; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv16i8( %pred, + * %addr) + ret %res +} + +; +; LDNT1H +; + +define @ldnt1h_i16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_i16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8i16( %pred, + * %addr) + ret %res +} + +define @ldnt1h_f16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_f16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8f16( %pred, + * %addr) + ret %res +} + +; +; LDNT1W +; + +define @ldnt1w_i32( %pred, * %addr) { +; CHECK-LABEL: ldnt1w_i32: +; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, ls
[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
efriedma added inline comments. Comment at: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:6587 + else if (!Size) +Size = MemVT.getStoreSize().getKnownMinSize(); In order for alias analysis to correctly handle a MachineMemOperand, the "Size" of an operation has to be conservative, in the sense that the the number of bytes accessed must be at most "Size". Otherwise we'll assume two operations don't alias when they actually do. For a scaled vector, we don't know the size, so we have to conservatively pass "MemoryLocation::UnknownSize". CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71000/new/ https://reviews.llvm.org/D71000 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
kmclaughlin updated this revision to Diff 232072. kmclaughlin edited the summary of this revision. kmclaughlin added a comment. - Removed AArch64 specific ISDNodes for MLOAD & MSTORE CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71000/new/ https://reviews.llvm.org/D71000 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -0,0 +1,95 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; STNT1B +; + +define void @stnt1b_i8( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1b_i8: +; CHECK: stnt1b { z0.b }, p0, [x0, #0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1H +; + +define void @stnt1h_i16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_i16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1h_f16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_f16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1W +; + +define void @stnt1w_i32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_i32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1w_f32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_f32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1D +; + +define void @stnt1d_i64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_i64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1d_f64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_f64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, + %pred, +* %addr) + ret void +} + +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LDNT1B +; + +define @ldnt1b_i8( %pred, * %addr) { +; CHECK-LABEL: ldnt1b_i8: +; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv16i8( %pred, + * %addr) + ret %res +} + +; +; LDNT1H +; + +define @ldnt1h_i16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_i16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8i16( %pred, + * %addr) + ret %res +} + +define @ldnt1h_f16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_f16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8f16( %pred, + * %addr) + ret %res +} + +; +; LDNT1W +; + +define @ldnt1w_i32( %pred, * %addr) { +; CHECK-LABEL: ldnt1w_i32: +; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv4i32( %pred
[PATCH] D71000: [AArch64][SVE] Implement intrinsics for non-temporal loads & stores
kmclaughlin created this revision. kmclaughlin added reviewers: sdesmalen, paulwalker-arm, dancgr, mgudim, efriedma. Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett. Herald added a reviewer: rengolin. Herald added a project: LLVM. Adds the following intrinsics: - llvm.aarch64.sve.ldnt1 - llvm.aarch64.sve.stnt1 This patch also adds the MLOAD & MSTORE AArch64ISD nodes, setting the MONonTemporal flag when used with the intrinsics above. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D71000 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -0,0 +1,95 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; STNT1B +; + +define void @stnt1b_i8( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1b_i8: +; CHECK: stnt1b { z0.b }, p0, [x0, #0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1H +; + +define void @stnt1h_i16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_i16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1h_f16( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1h_f16: +; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1W +; + +define void @stnt1w_i32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_i32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1w_f32( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1w_f32: +; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, + %pred, +* %addr) + ret void +} + +; +; STNT1D +; + +define void @stnt1d_i64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_i64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, + %pred, +* %addr) + ret void +} + +define void @stnt1d_f64( %data, %pred, * %addr) { +; CHECK-LABEL: stnt1d_f64: +; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, + %pred, +* %addr) + ret void +} + +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LDNT1B +; + +define @ldnt1b_i8( %pred, * %addr) { +; CHECK-LABEL: ldnt1b_i8: +; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv16i8( %pred, + * %addr) + ret %res +} + +; +; LDNT1H +; + +define @ldnt1h_i16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_i16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8i16( %pred, + * %addr) + ret %res +} + +define @ldnt1h_f16( %pred, * %addr) { +; CHECK-LABEL: ldnt1h_f16: +; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ldnt1.nxv8f16( %