This revision was automatically updated to reflect the committed changes. Closed by commit rG633db60f3ed0: [AArch64][SVE] Add SVE index intrinsic (authored by kmclaughlin).
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D74550/new/ https://reviews.llvm.org/D74550 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll @@ -0,0 +1,178 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; INDEX (IMMEDIATES) +; + +define <vscale x 16 x i8> @index_ii_i8() { +; CHECK-LABEL: index_ii_i8: +; CHECK: index z0.b, #-16, #15 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 -16, i8 15) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @index_ii_i16() { +; CHECK-LABEL: index_ii_i16: +; CHECK: index z0.h, #15, #-16 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 15, i16 -16) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @index_ii_i32() { +; CHECK-LABEL: index_ii_i32: +; CHECK: index z0.s, #-16, #15 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -16, i32 15) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @index_ii_i64() { +; CHECK-LABEL: index_ii_i64: +; CHECK: index z0.d, #15, #-16 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 15, i64 -16) + ret <vscale x 2 x i64> %out +} + +define <vscale x 2 x i64> @index_ii_range() { +; CHECK-LABEL: index_ii_range: +; CHECK: mov w8, #16 +; CHECK-NEXT: mov x9, #-17 +; CHECK-NEXT: index z0.d, x9, x8 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16) + ret <vscale x 2 x i64> %out +} + +; +; INDEX (IMMEDIATE, SCALAR) +; + +define <vscale x 16 x i8> @index_ir_i8(i8 %a) { +; CHECK-LABEL: index_ir_i8: +; CHECK: index z0.b, #15, w0 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 15, i8 %a) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @index_ir_i16(i16 %a) { +; CHECK-LABEL: index_ir_i16: +; CHECK: index z0.h, #-16, w0 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 -16, i16 %a) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @index_ir_i32(i32 %a) { +; CHECK-LABEL: index_ir_i32: +; CHECK: index z0.s, #15, w0 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 15, i32 %a) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @index_ir_i64(i64 %a) { +; CHECK-LABEL: index_ir_i64: +; CHECK: index z0.d, #-16, x0 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -16, i64 %a) + ret <vscale x 2 x i64> %out +} + +define <vscale x 4 x i32> @index_ir_range(i32 %a) { +; CHECK-LABEL: index_ir_range: +; CHECK: mov w8, #-17 +; CHECK: index z0.s, w8, w0 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a) + ret <vscale x 4 x i32> %out +} + +; +; INDEX (SCALAR, IMMEDIATE) +; + +define <vscale x 16 x i8> @index_ri_i8(i8 %a) { +; CHECK-LABEL: index_ri_i8: +; CHECK: index z0.b, w0, #-16 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 -16) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @index_ri_i16(i16 %a) { +; CHECK-LABEL: index_ri_i16: +; CHECK: index z0.h, w0, #15 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 15) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @index_ri_i32(i32 %a) { +; CHECK-LABEL: index_ri_i32: +; CHECK: index z0.s, w0, #-16 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 -16) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @index_ri_i64(i64 %a) { +; CHECK-LABEL: index_ri_i64: +; CHECK: index z0.d, x0, #15 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 15) + ret <vscale x 2 x i64> %out +} + +define <vscale x 8 x i16> @index_ri_range(i16 %a) { +; CHECK-LABEL: index_ri_range: +; CHECK: mov w8, #16 +; CHECK: index z0.h, w0, w8 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 16) + ret <vscale x 8 x i16> %out +} + +; +; INDEX (SCALARS) +; + +define <vscale x 16 x i8> @index_rr_i8(i8 %a, i8 %b) { +; CHECK-LABEL: index_rr_i8: +; CHECK: index z0.b, w0, w1 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @index_rr_i16(i16 %a, i16 %b) { +; CHECK-LABEL: index_rr_i16: +; CHECK: index z0.h, w0, w1 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @index_rr_i32(i32 %a, i32 %b) { +; CHECK-LABEL: index_rr_i32: +; CHECK: index z0.s, w0, w1 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @index_rr_i64(i64 %a, i64 %b) { +; CHECK-LABEL: index_rr_i64: +; CHECK: index z0.d, x0, x1 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 %b) + ret <vscale x 2 x i64> %out +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8, i8) +declare <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16, i16) +declare <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64, i64) Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4386,11 +4386,20 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ii<string asm> { - def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_32b>; - def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_32b>; +multiclass sve_int_index_ii<string asm, SDPatternOperator op> { + def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>; + def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>; def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; + + def : Pat<(nxv16i8 (op simm5_8b:$imm5, simm5_8b:$imm5b)), + (!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; + def : Pat<(nxv8i16 (op simm5_16b:$imm5, simm5_16b:$imm5b)), + (!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; + def : Pat<(nxv4i32 (op simm5_32b:$imm5, simm5_32b:$imm5b)), + (!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; + def : Pat<(nxv2i64 (op simm5_64b:$imm5, simm5_64b:$imm5b)), + (!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty, @@ -4410,11 +4419,20 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ir<string asm> { - def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_32b>; - def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_32b>; +multiclass sve_int_index_ir<string asm, SDPatternOperator op> { + def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>; + def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; + + def : Pat<(nxv16i8 (op simm5_8b:$imm5, GPR32:$Rm)), + (!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; + def : Pat<(nxv8i16 (op simm5_16b:$imm5, GPR32:$Rm)), + (!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; + def : Pat<(nxv4i32 (op simm5_32b:$imm5, GPR32:$Rm)), + (!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; + def : Pat<(nxv2i64 (op simm5_64b:$imm5, GPR64:$Rm)), + (!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; } class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty, @@ -4434,11 +4452,20 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ri<string asm> { - def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_32b>; - def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_32b>; +multiclass sve_int_index_ri<string asm, SDPatternOperator op> { + def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>; + def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; + + def : Pat<(nxv16i8 (op GPR32:$Rm, simm5_8b:$imm5)), + (!cast<Instruction>(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; + def : Pat<(nxv8i16 (op GPR32:$Rm, simm5_16b:$imm5)), + (!cast<Instruction>(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; + def : Pat<(nxv4i32 (op GPR32:$Rm, simm5_32b:$imm5)), + (!cast<Instruction>(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; + def : Pat<(nxv2i64 (op GPR64:$Rm, simm5_64b:$imm5)), + (!cast<Instruction>(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty, @@ -4458,11 +4485,16 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_rr<string asm> { +multiclass sve_int_index_rr<string asm, SDPatternOperator op> { def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; + + def : SVE_2_Op_Pat<nxv16i8, op, i32, i32, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<nxv8i16, op, i32, i32, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4i32, op, i32, i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2i64, op, i64, i64, !cast<Instruction>(NAME # _D)>; } // //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h =================================================================== --- llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -56,6 +56,9 @@ raw_ostream &O); void printImmHex(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + template <int Size> + void printSImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); template <typename T> void printImmSVE(T Value, raw_ostream &O); void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, raw_ostream &O); Index: llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp =================================================================== --- llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -900,6 +900,19 @@ O << format("#%#llx", Op.getImm()); } +template<int Size> +void AArch64InstPrinter::printSImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Size == 8) + O << "#" << formatImm((signed char)Op.getImm()); + else if (Size == 16) + O << "#" << formatImm((signed short)Op.getImm()); + else + O << "#" << formatImm(Op.getImm()); +} + void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -99,6 +99,9 @@ def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>; def AArch64dup_pred : SDNode<"AArch64ISD::DUP_PRED", SDT_AArch64DUP_PRED>; +def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>; +def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>; + let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; @@ -961,10 +964,10 @@ defm INCP_ZP : sve_int_count_v<0b10000, "incp">; defm DECP_ZP : sve_int_count_v<0b10100, "decp">; - defm INDEX_RR : sve_int_index_rr<"index">; - defm INDEX_IR : sve_int_index_ir<"index">; - defm INDEX_RI : sve_int_index_ri<"index">; - defm INDEX_II : sve_int_index_ii<"index">; + defm INDEX_RR : sve_int_index_rr<"index", index_vector>; + defm INDEX_IR : sve_int_index_ir<"index", index_vector>; + defm INDEX_RI : sve_int_index_ri<"index", index_vector>; + defm INDEX_II : sve_int_index_ii<"index", index_vector>; // Unpredicated shifts defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">; Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -330,6 +330,18 @@ let DecoderMethod = "DecodeSImm<5>"; } +def simm5_8b : Operand<i32>, ImmLeaf<i32, [{ return (int8_t)Imm >= -16 && (int8_t)Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; + let PrintMethod = "printSImm<8>"; +} + +def simm5_16b : Operand<i32>, ImmLeaf<i32, [{ return (int16_t)Imm >= -16 && (int16_t)Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; + let PrintMethod = "printSImm<16>"; +} + // simm7sN predicate - True if the immediate is a multiple of N in the range // [-64 * N, 63 * N]. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -216,6 +216,7 @@ PTRUE, DUP_PRED, + INDEX_VECTOR, LDNF1, LDNF1S, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1426,6 +1426,7 @@ case AArch64ISD::STP: return "AArch64ISD::STP"; case AArch64ISD::STNP: return "AArch64ISD::STNP"; case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED"; + case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR"; } return nullptr; } @@ -10918,6 +10919,21 @@ return SDValue(); } +static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + EVT ScalarTy = Op1.getValueType(); + + if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) { + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); + Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); + } + + return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0), + Op1, Op2); +} + static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) { SDLoc dl(N); SDValue Scalar = N->getOperand(3); @@ -11118,6 +11134,8 @@ return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG); case Intrinsic::aarch64_sve_andv: return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG); + case Intrinsic::aarch64_sve_index: + return LowerSVEIntrinsicIndex(N, DAG); case Intrinsic::aarch64_sve_dup: return LowerSVEIntrinsicDUP(N, DAG); case Intrinsic::aarch64_sve_ext: Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -796,6 +796,12 @@ LLVMPointerTo<0>], [IntrArgMemOnly, NoCapture<2>]>; + class AdvSIMD_SVE_Index_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMVectorElementType<0>, + LLVMVectorElementType<0>], + [IntrNoMem]>; + class AdvSIMD_Merged1VectorArg_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1237,6 +1243,8 @@ def int_aarch64_sve_dup : AdvSIMD_SVE_DUP_Intrinsic; +def int_aarch64_sve_index : AdvSIMD_SVE_Index_Intrinsic; + // // Integer arithmetic //
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits