https://github.com/UsmanNadeem updated
https://github.com/llvm/llvm-project/pull/77555
>From 7eeacff38b6d95fb2eb0fe13cad660801e7982fd Mon Sep 17 00:00:00 2001
From: "Nadeem, Usman"
Date: Tue, 9 Jan 2024 20:20:10 -0800
Subject: [PATCH 1/2] [AArch64][SVE2] Lower OR to SLI/SRI
Code builds on NEON code and the tests are adapted from NEON tests
minus the tests for illegal types.
Change-Id: I11325949700fb7433f948bbe3e82dbc71696aecc
---
.../Target/AArch64/AArch64ISelLowering.cpp| 152 ++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 +-
llvm/lib/Target/AArch64/AArch64Subtarget.h| 1 +
llvm/test/CodeGen/AArch64/sve2-sli-sri.ll | 263 ++
4 files changed, 357 insertions(+), 63 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve2-sli-sri.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 102fd0c3dae2ab..269dde004bea78 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1358,6 +1358,10 @@ AArch64TargetLowering::AArch64TargetLowering(const
TargetMachine ,
if (!Subtarget->isLittleEndian())
setOperationAction(ISD::BITCAST, VT, Expand);
+
+ if (Subtarget->hasSVE2orSME())
+// For SLI/SRI.
+setOperationAction(ISD::OR, VT, Custom);
}
// Illegal unpacked integer vector types.
@@ -5411,7 +5415,9 @@ SDValue
AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::aarch64_neon_vsri:
- case Intrinsic::aarch64_neon_vsli: {
+ case Intrinsic::aarch64_neon_vsli:
+ case Intrinsic::aarch64_sve_sri:
+ case Intrinsic::aarch64_sve_sli: {
EVT Ty = Op.getValueType();
if (!Ty.isVector())
@@ -5419,7 +5425,8 @@ SDValue
AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
-bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
+bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
+IntNo == Intrinsic::aarch64_sve_sri;
unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3));
@@ -12544,6 +12551,53 @@ static bool isAllConstantBuildVector(const SDValue
,
return true;
}
+static bool isAllInactivePredicate(SDValue N) {
+ // Look through cast.
+ while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
+N = N.getOperand(0);
+
+ return ISD::isConstantSplatVectorAllZeros(N.getNode());
+}
+
+static bool isAllActivePredicate(SelectionDAG , SDValue N) {
+ unsigned NumElts = N.getValueType().getVectorMinNumElements();
+
+ // Look through cast.
+ while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
+N = N.getOperand(0);
+// When reinterpreting from a type with fewer elements the "new" elements
+// are not active, so bail if they're likely to be used.
+if (N.getValueType().getVectorMinNumElements() < NumElts)
+ return false;
+ }
+
+ if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
+return true;
+
+ // "ptrue p., all" can be considered all active when is the same
size
+ // or smaller than the implicit element type represented by N.
+ // NOTE: A larger element count implies a smaller element type.
+ if (N.getOpcode() == AArch64ISD::PTRUE &&
+ N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
+return N.getValueType().getVectorMinNumElements() >= NumElts;
+
+ // If we're compiling for a specific vector-length, we can check if the
+ // pattern's VL equals that of the scalable vector at runtime.
+ if (N.getOpcode() == AArch64ISD::PTRUE) {
+const auto = DAG.getSubtarget();
+unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+if (MaxSVESize && MinSVESize == MaxSVESize) {
+ unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
+ unsigned PatNumElts =
+ getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
+ return PatNumElts == (NumElts * VScale);
+}
+ }
+
+ return false;
+}
+
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
// BUILD_VECTORs with constant element C1, C2 is a constant, and:
@@ -12569,32 +12623,52 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG
) {
// Is one of the operands an AND or a BICi? The AND may have been optimised
to
// a BICi in order to use an immediate instead of a register.
// Is the other operand an shl or lshr? This will have been turned into:
- // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
+ // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift
+ // or (AArch64ISD::SHL_PRED || AArch64ISD::SRL_PRED) mask, vector, #shiftVec.