[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
This revision was automatically updated to reflect the committed changes. Closed by commit rGcdcc4f2a44b5: [AArch64][SVE] Add intrinsic for non-faulting loads (authored by kmclaughlin). Changed prior to commit: https://reviews.llvm.org/D71698?vs=239144&id=239531#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @ldnf1b( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b: +; CHECK: ldnf1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a) + ret %load +} + +define @ldnf1b_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_h: +; CHECK: ldnf1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_h: +; CHECK: ldnf1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a) + ret %load +} + +define @ldnf1h_f16( %pg, half* %a) { +; CHECK-LABEL: ldnf1h_f16: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a) + ret %load +} + +define @ldnf1b_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_s: +; CHECK: ldnf1b { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_s: +; CHECK: ldnf1sb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_s: +; CHECK: ldnf1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_s: +; CHECK: ldnf1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a) + ret %load +} + +define @ldnf1w_f32( %pg, float* %a) { +; CHECK-LABEL: ldnf1w_f32: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a) + ret %load +} + +define @ldnf1b_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_d: +; CHECK: ldnf1b { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_d: +; CHECK: ldnf1sb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_d: +; CHECK: ldnf1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_d: +; CHECK: ldnf1sh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w_d: +; CHECK: ldnf1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sw_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1sw_d: +; CHECK: ldnf1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = sext %load to + ret %res +} + +define @ldnf1d( %pg, i64* %a) { +; CHECK-LABEL: ldnf1d: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a) + ret %load +} + +define @ldnf1d_f64(
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
sdesmalen accepted this revision. sdesmalen added a comment. This revision is now accepted and ready to land. LGTM [with the caveat that we need to revisit the modelling of the `FFR` register and get rid fo the `PseudoInstExpansion` at a later point, as discussed during the previous sync-up call] CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
kmclaughlin updated this revision to Diff 239144. kmclaughlin added a comment. - Some minor changes to performSignExtendInRegCombine to address comments from @sdesmalen CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @ldnf1b( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b: +; CHECK: ldnf1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a) + ret %load +} + +define @ldnf1b_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_h: +; CHECK: ldnf1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_h: +; CHECK: ldnf1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a) + ret %load +} + +define @ldnf1h_f16( %pg, half* %a) { +; CHECK-LABEL: ldnf1h_f16: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a) + ret %load +} + +define @ldnf1b_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_s: +; CHECK: ldnf1b { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_s: +; CHECK: ldnf1sb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_s: +; CHECK: ldnf1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_s: +; CHECK: ldnf1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a) + ret %load +} + +define @ldnf1w_f32( %pg, float* %a) { +; CHECK-LABEL: ldnf1w_f32: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a) + ret %load +} + +define @ldnf1b_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_d: +; CHECK: ldnf1b { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_d: +; CHECK: ldnf1sb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_d: +; CHECK: ldnf1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_d: +; CHECK: ldnf1sh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w_d: +; CHECK: ldnf1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sw_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1sw_d: +; CHECK: ldnf1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = sext %load to + ret %res +} + +define @ldnf1d( %pg, i64* %a) { +; CHECK-LABEL: ldnf1d: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a) + ret %load +} + +define @ldnf1d_f64( %pg, double* %a) { +; CHECK-LABEL: ldnf1d_f64: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
sdesmalen added inline comments. Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12460 - if ((SignExtSrcVT != GLD1SrcMemVT) || !Src.hasOneUse()) + unsigned OpNum = NewOpc == AArch64ISD::LDNF1S ? 3 : 4; + EVT LD1SrcMemVT = cast(Src->getOperand(OpNum))->getVT(); Move the assignment of `MemVTOpNum` to the switch statement above instead of special-casing it here? Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12461 + unsigned OpNum = NewOpc == AArch64ISD::LDNF1S ? 3 : 4; + EVT LD1SrcMemVT = cast(Src->getOperand(OpNum))->getVT(); + nit: `s/LD1SrcMemVT/SrcMemVT/` Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12469 + + SmallVector Ops = {Src->getOperand(0), Src->getOperand(1), + Src->getOperand(2), Src->getOperand(3)}; Better make the default '5' if there is a large likelihood of there being 5 default values. Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12469 + + SmallVector Ops = {Src->getOperand(0), Src->getOperand(1), + Src->getOperand(2), Src->getOperand(3)}; sdesmalen wrote: > Better make the default '5' if there is a large likelihood of there being 5 > default values. Instead of special -casing LDNF1S below, you can write this as: SmallVector Ops; for(unsigned I=0; IgetNumOperands(); ++I) Ops.push_back(Src->getOperand(I)); CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
kmclaughlin marked 5 inline comments as done. kmclaughlin added a comment. Thanks for your suggestions, @andwar! CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
kmclaughlin updated this revision to Diff 237906. kmclaughlin added a comment. - Rebased patch - Updated comments and extended getSVEContainerType to handle nxv8i16 & nxv16i8 CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @ldnf1b( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b: +; CHECK: ldnf1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a) + ret %load +} + +define @ldnf1b_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_h: +; CHECK: ldnf1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_h: +; CHECK: ldnf1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a) + ret %load +} + +define @ldnf1h_f16( %pg, half* %a) { +; CHECK-LABEL: ldnf1h_f16: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a) + ret %load +} + +define @ldnf1b_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_s: +; CHECK: ldnf1b { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_s: +; CHECK: ldnf1sb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_s: +; CHECK: ldnf1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_s: +; CHECK: ldnf1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a) + ret %load +} + +define @ldnf1w_f32( %pg, float* %a) { +; CHECK-LABEL: ldnf1w_f32: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a) + ret %load +} + +define @ldnf1b_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_d: +; CHECK: ldnf1b { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_d: +; CHECK: ldnf1sb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_d: +; CHECK: ldnf1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_d: +; CHECK: ldnf1sh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w_d: +; CHECK: ldnf1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sw_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1sw_d: +; CHECK: ldnf1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = sext %load to + ret %res +} + +define @ldnf1d( %pg, i64* %a) { +; CHECK-LABEL: ldnf1d: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a) + ret %load +} + +define @ldnf1d_f64( %pg, double* %a) { +; CHECK-LABEL: ldnf1d_f64: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch6
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
efriedma added inline comments. Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333 + // We need a layer of indirection because early machine code passes balk at + // physical register (i.e. FFR) uses that have no previous definition. + let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { sdesmalen wrote: > efriedma wrote: > > kmclaughlin wrote: > > > efriedma wrote: > > > > This is depending on hasSideEffects to preserve the correct ordering > > > > with instructions that read/write FFR? That probably works. I guess > > > > the alternative is to insert an IMPLICIT_DEF of FFR in the entry block > > > > of each function. > > > > > > > > What are the calling convention rules for FFR? Is it callee-save? If > > > > not, we might need to do some work to make FFR reads/writes do > > > > something sane across calls inserted by the compiler. > > > The FFR is not callee-saved. We will need to add support to save & > > > restore it where appropriate at the point the compiler starts generating > > > reads to the FFR, but for the purpose of the ACLE the user will be > > > required to do this if necessary. > > How can the user write correct code to save/restore the FFR? The compiler > > can move arbitrary readnone/argmemonly calls between the definition and the > > use. > There are separate intrinsics for loading/writing the FFR (svrdffr, svsetffr, > svwrffr), which use a `svbool_t` to keep the value of the FFR. These > intrinsics are implemented in the same way with a Pseudo with `hasSideEffects > = 1` set. > > I thought this flag would prevent other calls from being scheduled/moved over > these intrinsics, as they have unknown/unmodelled side-effects and would thus > act kind of like a barrier? > The issue would be transforms at the IR/SelectionDAG level. We can probably model calls at the MIR level correctly, like you're describing. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
sdesmalen added inline comments. Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333 + // We need a layer of indirection because early machine code passes balk at + // physical register (i.e. FFR) uses that have no previous definition. + let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { efriedma wrote: > kmclaughlin wrote: > > efriedma wrote: > > > This is depending on hasSideEffects to preserve the correct ordering with > > > instructions that read/write FFR? That probably works. I guess the > > > alternative is to insert an IMPLICIT_DEF of FFR in the entry block of > > > each function. > > > > > > What are the calling convention rules for FFR? Is it callee-save? If > > > not, we might need to do some work to make FFR reads/writes do something > > > sane across calls inserted by the compiler. > > The FFR is not callee-saved. We will need to add support to save & restore > > it where appropriate at the point the compiler starts generating reads to > > the FFR, but for the purpose of the ACLE the user will be required to do > > this if necessary. > How can the user write correct code to save/restore the FFR? The compiler > can move arbitrary readnone/argmemonly calls between the definition and the > use. There are separate intrinsics for loading/writing the FFR (svrdffr, svsetffr, svwrffr), which use a `svbool_t` to keep the value of the FFR. These intrinsics are implemented in the same way with a Pseudo with `hasSideEffects = 1` set. I thought this flag would prevent other calls from being scheduled/moved over these intrinsics, as they have unknown/unmodelled side-effects and would thus act kind of like a barrier? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
andwar added inline comments. Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:9998 + // GLD1* instructions perform an implicit zero-extend, which makes them // perfect candidates for combining. Could you replace `GLD1*` with `Load`? I believe that that will be still correct with the added bonus of covering the new case :) Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:11051 + if (ContainerVT.isInteger()) { +switch (VT.getVectorNumElements()) { +default: return SDValue(); You could use `getSVEContainterType` here instead. You'll need to extend it a wee bit. Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12284 // Gather load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes. The following `switch` statement will now cover more than just *Gather* nodes. Maybe `SVE load nodes` instead? Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12328-12331 + Ops.push_back(Src->getOperand(0)); + Ops.push_back(Src->getOperand(1)); + Ops.push_back(Src->getOperand(2)); + Ops.push_back(Src->getOperand(3)); Why not: ``` SmallVector Ops = {Src->getOperand(0), Src->getOperand(1), Src->getOperand(2), Src->getOperand(3), Src->getOperand(4)}; ``` ? Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:12332 + Ops.push_back(Src->getOperand(3)); + if (NewOpc != AArch64ISD::LDNF1S) +Ops.push_back(Src->getOperand(4)); Could you add a comment explaining what the underlying difference between `LDNF1S` and `GLD1S` is? Otherwise it's not clear why this `if` statement is needed. IIUC, `GLD1S` has an extra argument for the offsets (hence 5 args vs 4). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
efriedma added inline comments. Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333 + // We need a layer of indirection because early machine code passes balk at + // physical register (i.e. FFR) uses that have no previous definition. + let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { kmclaughlin wrote: > efriedma wrote: > > This is depending on hasSideEffects to preserve the correct ordering with > > instructions that read/write FFR? That probably works. I guess the > > alternative is to insert an IMPLICIT_DEF of FFR in the entry block of each > > function. > > > > What are the calling convention rules for FFR? Is it callee-save? If not, > > we might need to do some work to make FFR reads/writes do something sane > > across calls inserted by the compiler. > The FFR is not callee-saved. We will need to add support to save & restore it > where appropriate at the point the compiler starts generating reads to the > FFR, but for the purpose of the ACLE the user will be required to do this if > necessary. How can the user write correct code to save/restore the FFR? The compiler can move arbitrary readnone/argmemonly calls between the definition and the use. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
kmclaughlin added inline comments. Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333 + // We need a layer of indirection because early machine code passes balk at + // physical register (i.e. FFR) uses that have no previous definition. + let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { efriedma wrote: > This is depending on hasSideEffects to preserve the correct ordering with > instructions that read/write FFR? That probably works. I guess the > alternative is to insert an IMPLICIT_DEF of FFR in the entry block of each > function. > > What are the calling convention rules for FFR? Is it callee-save? If not, > we might need to do some work to make FFR reads/writes do something sane > across calls inserted by the compiler. The FFR is not callee-saved. We will need to add support to save & restore it where appropriate at the point the compiler starts generating reads to the FFR, but for the purpose of the ACLE the user will be required to do this if necessary. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
efriedma added inline comments. Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:5333 + // We need a layer of indirection because early machine code passes balk at + // physical register (i.e. FFR) uses that have no previous definition. + let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { This is depending on hasSideEffects to preserve the correct ordering with instructions that read/write FFR? That probably works. I guess the alternative is to insert an IMPLICIT_DEF of FFR in the entry block of each function. What are the calling convention rules for FFR? Is it callee-save? If not, we might need to do some work to make FFR reads/writes do something sane across calls inserted by the compiler. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71698/new/ https://reviews.llvm.org/D71698 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71698: [AArch64][SVE] Add intrinsic for non-faulting loads
kmclaughlin created this revision. kmclaughlin added reviewers: sdesmalen, efriedma, andwar, dancgr, mgudim. Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett. Herald added a reviewer: rengolin. Herald added a project: LLVM. This patch adds the llvm.aarch64.sve.ldnf1 intrinsic, plus DAG combine rules for non-faulting loads and sign/zero extends Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D71698 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @ldnf1b( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b: +; CHECK: ldnf1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %a) + ret %load +} + +define @ldnf1b_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_h: +; CHECK: ldnf1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_h( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_h: +; CHECK: ldnf1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %a) + ret %load +} + +define @ldnf1h_f16( %pg, half* %a) { +; CHECK-LABEL: ldnf1h_f16: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %a) + ret %load +} + +define @ldnf1b_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_s: +; CHECK: ldnf1b { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_s( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_s: +; CHECK: ldnf1sb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_s: +; CHECK: ldnf1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_s( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_s: +; CHECK: ldnf1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %a) + ret %load +} + +define @ldnf1w_f32( %pg, float* %a) { +; CHECK-LABEL: ldnf1w_f32: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %a) + ret %load +} + +define @ldnf1b_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_d: +; CHECK: ldnf1b { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sb_d( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_d: +; CHECK: ldnf1sb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %a) + %res = sext %load to + ret %res +} + +define @ldnf1h_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_d: +; CHECK: ldnf1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sh_d( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_d: +; CHECK: ldnf1sh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %a) + %res = sext %load to + ret %res +} + +define @ldnf1w_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w_d: +; CHECK: ldnf1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = zext %load to + ret %res +} + +define @ldnf1sw_d( %pg, i32* %a) { +; CHECK-LABEL: ldnf1sw_d: +; CHECK: ldnf1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %a) + %res = sext %load to + ret %res +} + +define @ldnf1d( %pg, i64* %a) { +; CHECK-LABEL: ldnf1d: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %a) +