kmclaughlin created this revision. kmclaughlin added reviewers: sdesmalen, paulwalker-arm, efriedma, dancgr, mgudim. Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett. Herald added a reviewer: rengolin. Herald added a project: LLVM.
Adds the llvm.aarch64.sve.ldnf1 intrinsic, adding a new flag to MachineMemOperand (MONonFaulting) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D71556 Files: llvm/include/llvm/CodeGen/MachineMemOperand.h llvm/include/llvm/CodeGen/SelectionDAGNodes.h llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/CodeGen/MachineOperand.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <vscale x 16 x i8> @ldnf1b(<vscale x 16 x i1> %pg, i8* %a) { +; CHECK-LABEL: ldnf1b: +; CHECK: ldnf1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %a) + ret <vscale x 16 x i8> %load +} + +define <vscale x 8 x i16> @ldnf1b_h(<vscale x 8 x i1> %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_h: +; CHECK: ldnf1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, i8* %a) + %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16> + ret <vscale x 8 x i16> %res +} + +define <vscale x 8 x i16> @ldnf1sb_h(<vscale x 8 x i1> %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_h: +; CHECK: ldnf1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, i8* %a) + %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16> + ret <vscale x 8 x i16> %res +} + +define <vscale x 8 x i16> @ldnf1h(<vscale x 8 x i1> %pg, i16* %a) { +; CHECK-LABEL: ldnf1h: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, i16* %a) + ret <vscale x 8 x i16> %load +} + +define <vscale x 8 x half> @ldnf1h_f16(<vscale x 8 x i1> %pg, half* %a) { +; CHECK-LABEL: ldnf1h_f16: +; CHECK: ldnf1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, half* %a) + ret <vscale x 8 x half> %load +} + +define <vscale x 4 x i32> @ldnf1b_s(<vscale x 4 x i1> %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_s: +; CHECK: ldnf1b { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, i8* %a) + %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> + ret <vscale x 4 x i32> %res +} + +define <vscale x 4 x i32> @ldnf1sb_s(<vscale x 4 x i1> %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_s: +; CHECK: ldnf1sb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, i8* %a) + %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> + ret <vscale x 4 x i32> %res +} + +define <vscale x 4 x i32> @ldnf1h_s(<vscale x 4 x i1> %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_s: +; CHECK: ldnf1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, i16* %a) + %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> + ret <vscale x 4 x i32> %res +} + +define <vscale x 4 x i32> @ldnf1sh_s(<vscale x 4 x i1> %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_s: +; CHECK: ldnf1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, i16* %a) + %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> + ret <vscale x 4 x i32> %res +} + +define <vscale x 4 x i32> @ldnf1w(<vscale x 4 x i1> %pg, i32* %a) { +; CHECK-LABEL: ldnf1w: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, i32* %a) + ret <vscale x 4 x i32> %load +} + +define <vscale x 4 x float> @ldnf1w_f32(<vscale x 4 x i1> %pg, float* %a) { +; CHECK-LABEL: ldnf1w_f32: +; CHECK: ldnf1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, float* %a) + ret <vscale x 4 x float> %load +} + +define <vscale x 2 x i64> @ldnf1b_d(<vscale x 2 x i1> %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_d: +; CHECK: ldnf1b { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, i8* %a) + %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> + ret <vscale x 2 x i64> %res +} + +define <vscale x 2 x i64> @ldnf1sb_d(<vscale x 2 x i1> %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_d: +; CHECK: ldnf1sb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, i8* %a) + %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> + ret <vscale x 2 x i64> %res +} + +define <vscale x 2 x i64> @ldnf1h_d(<vscale x 2 x i1> %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_d: +; CHECK: ldnf1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, i16* %a) + %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> + ret <vscale x 2 x i64> %res +} + +define <vscale x 2 x i64> @ldnf1sh_d(<vscale x 2 x i1> %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_d: +; CHECK: ldnf1sh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, i16* %a) + %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> + ret <vscale x 2 x i64> %res +} + +define <vscale x 2 x i64> @ldnf1w_d(<vscale x 2 x i1> %pg, i32* %a) { +; CHECK-LABEL: ldnf1w_d: +; CHECK: ldnf1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, i32* %a) + %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> + ret <vscale x 2 x i64> %res +} + +define <vscale x 2 x i64> @ldnf1sw_d(<vscale x 2 x i1> %pg, i32* %a) { +; CHECK-LABEL: ldnf1sw_d: +; CHECK: ldnf1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, i32* %a) + %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> + ret <vscale x 2 x i64> %res +} + +define <vscale x 2 x i64> @ldnf1d(<vscale x 2 x i1> %pg, i64* %a) { +; CHECK-LABEL: ldnf1d: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, i64* %a) + ret <vscale x 2 x i64> %load +} + +define <vscale x 2 x double> @ldnf1d_f64(<vscale x 2 x i1> %pg, double* %a) { +; CHECK-LABEL: ldnf1d_f64: +; CHECK: ldnf1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, double* %a) + ret <vscale x 2 x double> %load +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1>, i8*) + +declare <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1>, i8*) +declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1>, i16*) +declare <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1>, half*) + +declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1>, i8*) +declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1>, i16*) +declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1>, i32*) +declare <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1>, float*) + +declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1>, i8*) +declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1>, i16*) +declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1>, i32*) +declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1>, i64*) +declare <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1>, double*) Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -5211,14 +5211,21 @@ multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def "" : sve_mem_cld_si_base<dtype, nf, asm, listty>; + def _REAL : sve_mem_cld_si_base<dtype, nf, asm, listty>; def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", - (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", - (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", - (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; + (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; + + // We need a layer of indirection because early machine code passes balk at + // physical register (i.e. FFR) uses that have no previous definition. + let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in { + def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), []>, + PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>; + } } multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty, Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1188,6 +1188,30 @@ defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRI>; defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRI>; defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRI>; + + // 2-element contiguous non-faulting loads + defm : pred_load<nxv2i64, nxv2i1, zext_non_faulting_load_i8, LDNF1B_D_IMM>; + defm : pred_load<nxv2i64, nxv2i1, zext_non_faulting_load_i16, LDNF1H_D_IMM>; + defm : pred_load<nxv2i64, nxv2i1, zext_non_faulting_load_i32, LDNF1W_D_IMM>; + defm : pred_load<nxv2i64, nxv2i1, sext_non_faulting_load_i8, LDNF1SB_D_IMM>; + defm : pred_load<nxv2i64, nxv2i1, sext_non_faulting_load_i16, LDNF1SH_D_IMM>; + defm : pred_load<nxv2i64, nxv2i1, sext_non_faulting_load_i32, LDNF1SW_D_IMM>; + defm : pred_load<nxv2i64, nxv2i1, non_faulting_load, LDNF1D_IMM>; + + // 4-element contiguous non-faulting loads + defm : pred_load<nxv4i32, nxv4i1, zext_non_faulting_load_i8, LDNF1B_S_IMM>; + defm : pred_load<nxv4i32, nxv4i1, zext_non_faulting_load_i16, LDNF1H_S_IMM>; + defm : pred_load<nxv4i32, nxv4i1, sext_non_faulting_load_i8, LDNF1SB_S_IMM>; + defm : pred_load<nxv4i32, nxv4i1, sext_non_faulting_load_i16, LDNF1SH_S_IMM>; + defm : pred_load<nxv4i32, nxv4i1, non_faulting_load, LDNF1W_IMM>; + + // 8-element contiguous non-faulting loads + defm : pred_load<nxv8i16, nxv8i1, zext_non_faulting_load_i8, LDNF1B_H_IMM>; + defm : pred_load<nxv8i16, nxv8i1, sext_non_faulting_load_i8, LDNF1SB_H_IMM>; + defm : pred_load<nxv8i16, nxv8i1, non_faulting_load, LDNF1H_IMM>; + + // 16-element contiguous non-faulting loads + defm : pred_load<nxv16i8, nxv16i1, non_faulting_load, LDNF1B_IMM>; } let Predicates = [HasSVE2] in { Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -266,7 +266,8 @@ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && cast<MaskedLoadSDNode>(N)->isUnindexed() && - !cast<MaskedLoadSDNode>(N)->isNonTemporal(); + !cast<MaskedLoadSDNode>(N)->isNonTemporal() && + !cast<MaskedLoadSDNode>(N)->isNonFaulting(); }]>; // sign extending masked load fragments. def asext_masked_load : @@ -274,7 +275,8 @@ (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) && - cast<MaskedLoadSDNode>(N)->isUnindexed(); + cast<MaskedLoadSDNode>(N)->isUnindexed() && + !cast<MaskedLoadSDNode>(N)->isNonFaulting(); }]>; def asext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -296,7 +298,8 @@ PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD && - cast<MaskedLoadSDNode>(N)->isUnindexed(); + cast<MaskedLoadSDNode>(N)->isUnindexed() && + !cast<MaskedLoadSDNode>(N)->isNonFaulting(); }]>; def zext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -319,7 +322,71 @@ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && cast<MaskedLoadSDNode>(N)->isUnindexed() && - cast<MaskedLoadSDNode>(N)->isNonTemporal(); + cast<MaskedLoadSDNode>(N)->isNonTemporal() && + !cast<MaskedLoadSDNode>(N)->isNonFaulting(); +}]>; + +def non_faulting_load : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast<MaskedLoadSDNode>(N)->isUnindexed() && + !cast<MaskedLoadSDNode>(N)->isNonTemporal() && + cast<MaskedLoadSDNode>(N)->isNonFaulting(); +}]>; + +def sext_non_faulting_load : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && + cast<MaskedLoadSDNode>(N)->isUnindexed() && + !cast<MaskedLoadSDNode>(N)->isNonTemporal() && + cast<MaskedLoadSDNode>(N)->isNonFaulting(); +}]>; + +def sext_non_faulting_load_i8 : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (sext_non_faulting_load node:$ptr, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def sext_non_faulting_load_i16 : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (sext_non_faulting_load node:$ptr, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def sext_non_faulting_load_i32 : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (sext_non_faulting_load node:$ptr, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def zext_non_faulting_load : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD && + cast<MaskedLoadSDNode>(N)->isUnindexed() && + !cast<MaskedLoadSDNode>(N)->isNonTemporal() && + cast<MaskedLoadSDNode>(N)->isNonFaulting(); +}]>; + +def zext_non_faulting_load_i8 : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (zext_non_faulting_load node:$ptr, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def zext_non_faulting_load_i16 : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (zext_non_faulting_load node:$ptr, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def zext_non_faulting_load_i32 : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (zext_non_faulting_load node:$ptr, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; // non-truncating masked store fragment. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8525,6 +8525,16 @@ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal; return true; } + case Intrinsic::aarch64_sve_ldnf1: { + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonFaulting; + return true; + } case Intrinsic::aarch64_sve_stnt1: { PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -12283,6 +12293,7 @@ case Intrinsic::aarch64_neon_st4lane: return performNEONPostLDSTCombine(N, DCI, DAG); case Intrinsic::aarch64_sve_ldnt1: + case Intrinsic::aarch64_sve_ldnf1: return performLDNT1Combine(N, DAG); case Intrinsic::aarch64_sve_stnt1: return performSTNT1Combine(N, DAG); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4479,6 +4479,22 @@ } } + if (Operand.getOpcode() == ISD::SPLAT_VECTOR) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getOperand(0))) { + const APInt &Val = C->getAPIntValue(); + switch (Opcode) { + default: break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + return getConstant(Val.zextOrTrunc(VT.getScalarSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::SIGN_EXTEND: + return getConstant(Val.sextOrTrunc(VT.getScalarSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + } + } + } + unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { case ISD::TokenFactor: @@ -8914,6 +8930,7 @@ MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal(); MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable(); MemSDNodeBits.IsInvariant = MMO->isInvariant(); + MemSDNodeBits.IsNonFaulting = MMO->isNonFaulting(); // We check here that the size of the memory operand fits within the size of // the MMO. This is because the MMO might indicate only a possible address Index: llvm/lib/CodeGen/MachineOperand.cpp =================================================================== --- llvm/lib/CodeGen/MachineOperand.cpp +++ llvm/lib/CodeGen/MachineOperand.cpp @@ -1089,6 +1089,8 @@ if (getFlags() & MachineMemOperand::MOTargetFlag3) OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3) << "\" "; + if (isNonFaulting()) + OS << "non-faulting "; assert((isLoad() || isStore()) && "machine memory operand must be a load or store (or both)"); Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -775,6 +775,12 @@ LLVMPointerTo<0>], [IntrReadMem, IntrArgMemOnly]>; + class AdvSIMD_1Vec_PredFaultingLoad_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMPointerToElt<0>], + [IntrReadMem, IntrArgMemOnly]>; + class AdvSIMD_1Vec_PredStore_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, @@ -1070,6 +1076,8 @@ def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic; +def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic; + // // Stores // Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -548,8 +548,9 @@ uint16_t IsNonTemporal : 1; uint16_t IsDereferenceable : 1; uint16_t IsInvariant : 1; + uint16_t IsNonFaulting : 1; }; - enum { NumMemSDNodeBits = NumSDNodeBits + 4 }; + enum { NumMemSDNodeBits = NumSDNodeBits + 5 }; class LSBaseSDNodeBitfields { friend class LSBaseSDNode; @@ -1321,6 +1322,7 @@ bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; } bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; } bool isInvariant() const { return MemSDNodeBits.IsInvariant; } + bool isNonFaulting() const { return MemSDNodeBits.IsNonFaulting; } // Returns the offset from the location of the access. int64_t getSrcValueOffset() const { return MMO->getOffset(); } Index: llvm/include/llvm/CodeGen/MachineMemOperand.h =================================================================== --- llvm/include/llvm/CodeGen/MachineMemOperand.h +++ llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -150,7 +150,10 @@ MOTargetFlag2 = 1u << 7, MOTargetFlag3 = 1u << 8, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ MOTargetFlag3) + // The memory access is non-faulting + MONonFaulting = 1u << 9, + + LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ MONonFaulting) }; private: @@ -261,6 +264,7 @@ bool isNonTemporal() const { return FlagVals & MONonTemporal; } bool isDereferenceable() const { return FlagVals & MODereferenceable; } bool isInvariant() const { return FlagVals & MOInvariant; } + bool isNonFaulting() const { return FlagVals & MONonFaulting; } /// Returns true if this operation has an atomic ordering requirement of /// unordered or higher, false otherwise.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits