[clang-tools-extra] [llvm] [clang] [AArch64][SVE2] Lower OR to SLI/SRI (PR #77555)

2024-01-11 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/77555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [AArch64][SVE2] Lower OR to SLI/SRI (PR #77555)

2024-01-11 Thread Usman Nadeem via cfe-commits

https://github.com/UsmanNadeem updated 
https://github.com/llvm/llvm-project/pull/77555

>From 7eeacff38b6d95fb2eb0fe13cad660801e7982fd Mon Sep 17 00:00:00 2001
From: "Nadeem, Usman" 
Date: Tue, 9 Jan 2024 20:20:10 -0800
Subject: [PATCH 1/2] [AArch64][SVE2] Lower OR to SLI/SRI

Code builds on NEON code and the tests are adapted from NEON tests
minus the tests for illegal types.

Change-Id: I11325949700fb7433f948bbe3e82dbc71696aecc
---
 .../Target/AArch64/AArch64ISelLowering.cpp| 152 ++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   4 +-
 llvm/lib/Target/AArch64/AArch64Subtarget.h|   1 +
 llvm/test/CodeGen/AArch64/sve2-sli-sri.ll | 263 ++
 4 files changed, 357 insertions(+), 63 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve2-sli-sri.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 102fd0c3dae2ab..269dde004bea78 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1358,6 +1358,10 @@ AArch64TargetLowering::AArch64TargetLowering(const 
TargetMachine ,
 
   if (!Subtarget->isLittleEndian())
 setOperationAction(ISD::BITCAST, VT, Expand);
+
+  if (Subtarget->hasSVE2orSME())
+// For SLI/SRI.
+setOperationAction(ISD::OR, VT, Custom);
 }
 
 // Illegal unpacked integer vector types.
@@ -5411,7 +5415,9 @@ SDValue 
AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   }
 
   case Intrinsic::aarch64_neon_vsri:
-  case Intrinsic::aarch64_neon_vsli: {
+  case Intrinsic::aarch64_neon_vsli:
+  case Intrinsic::aarch64_sve_sri:
+  case Intrinsic::aarch64_sve_sli: {
 EVT Ty = Op.getValueType();
 
 if (!Ty.isVector())
@@ -5419,7 +5425,8 @@ SDValue 
AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
 
-bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
+bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
+IntNo == Intrinsic::aarch64_sve_sri;
 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3));
@@ -12544,6 +12551,53 @@ static bool isAllConstantBuildVector(const SDValue 
,
   return true;
 }
 
+static bool isAllInactivePredicate(SDValue N) {
+  // Look through cast.
+  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
+N = N.getOperand(0);
+
+  return ISD::isConstantSplatVectorAllZeros(N.getNode());
+}
+
+static bool isAllActivePredicate(SelectionDAG , SDValue N) {
+  unsigned NumElts = N.getValueType().getVectorMinNumElements();
+
+  // Look through cast.
+  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
+N = N.getOperand(0);
+// When reinterpreting from a type with fewer elements the "new" elements
+// are not active, so bail if they're likely to be used.
+if (N.getValueType().getVectorMinNumElements() < NumElts)
+  return false;
+  }
+
+  if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
+return true;
+
+  // "ptrue p., all" can be considered all active when  is the same 
size
+  // or smaller than the implicit element type represented by N.
+  // NOTE: A larger element count implies a smaller element type.
+  if (N.getOpcode() == AArch64ISD::PTRUE &&
+  N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
+return N.getValueType().getVectorMinNumElements() >= NumElts;
+
+  // If we're compiling for a specific vector-length, we can check if the
+  // pattern's VL equals that of the scalable vector at runtime.
+  if (N.getOpcode() == AArch64ISD::PTRUE) {
+const auto  = DAG.getSubtarget();
+unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+if (MaxSVESize && MinSVESize == MaxSVESize) {
+  unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
+  unsigned PatNumElts =
+  getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
+  return PatNumElts == (NumElts * VScale);
+}
+  }
+
+  return false;
+}
+
 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
 // BUILD_VECTORs with constant element C1, C2 is a constant, and:
@@ -12569,32 +12623,52 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG 
) {
   // Is one of the operands an AND or a BICi? The AND may have been optimised 
to
   // a BICi in order to use an immediate instead of a register.
   // Is the other operand an shl or lshr? This will have been turned into:
-  // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
+  // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift
+  // or (AArch64ISD::SHL_PRED || AArch64ISD::SRL_PRED) mask, vector, #shiftVec.