================
@@ -6995,6 +7002,122 @@ static SDValue performSINT_TO_FPCombine(SDNode *N, 
SelectionDAG &DAG,
   return SDValue();
 }
 
+// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
+// logical operations, like in the example below.
+//   or (and (truncate x, truncate y)),
+//      (xor (truncate z, build_vector (constants)))
+// Given a target type \p VT, we generate
+//   or (and x, y), (xor z, zext(build_vector (constants)))
+// given x, y and z are of type \p VT. We can do so, if operands are either
+// truncates from VT types, the second operand is a vector of constants, can
+// be recursively promoted or is an existing extension we can extend further.
+static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
+                                     SelectionDAG &DAG,
+                                     const LoongArchSubtarget &Subtarget,
+                                     unsigned Depth) {
+  // Limit recursion to avoid excessive compile times.
+  if (Depth >= SelectionDAG::MaxRecursionDepth)
+    return SDValue();
+
+  if (!ISD::isBitwiseLogicOp(N.getOpcode()))
+    return SDValue();
+
+  SDValue N0 = N.getOperand(0);
+  SDValue N1 = N.getOperand(1);
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
+    return SDValue();
+
+  if (SDValue NN0 =
+          PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
+    N0 = NN0;
+  else {
+    // The left side has to be a 'trunc'.
+    bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
+                    N0.getOperand(0).getValueType() == VT;
+    if (LHSTrunc)
+      N0 = N0.getOperand(0);
+    else
+      return SDValue();
+  }
+
+  if (SDValue NN1 =
+          PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
+    N1 = NN1;
+  else {
+    // The right side has to be a 'trunc', a (foldable) constant or an
+    // existing extension we can extend further.
+    bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
+                    N1.getOperand(0).getValueType() == VT;
+    if (RHSTrunc)
+      N1 = N1.getOperand(0);
+    else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
+             Subtarget.hasExtLASX() && N1.hasOneUse())
+      N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
+    // On 32-bit platform, i64 is an illegal integer scalar type, and
+    // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
+    // future.
+    else if (SDValue Cst =
+                 DAG.FoldConstantArithmetic(ISD::ZERO_EXTEND, DL, VT, {N1}))
+      N1 = Cst;
+    else
+      return SDValue();
+  }
+
+  return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
+}
+
+// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, 
which
+// is LSX-sized register. In most cases we actually compare or select 
LASX-sized
+// registers and mixing the two types creates horrible code. This method
+// optimizes some of the transition sequences.
+static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
+                                     SelectionDAG &DAG,
+                                     const LoongArchSubtarget &Subtarget) {
+  EVT VT = N.getValueType();
+  assert(VT.isVector() && "Expected vector type");
+  assert((N.getOpcode() == ISD::ANY_EXTEND ||
+          N.getOpcode() == ISD::ZERO_EXTEND ||
+          N.getOpcode() == ISD::SIGN_EXTEND) &&
+         "Invalid Node");
+
+  if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
+    return SDValue();
+
+  SDValue Narrow = N.getOperand(0);
+  EVT NarrowVT = Narrow.getValueType();
+
+  // Generate the wide operation.
+  SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
+  if (!Op)
+    return SDValue();
+  switch (N.getOpcode()) {
+  default:
+    llvm_unreachable("Unexpected opcode");
+  case ISD::ANY_EXTEND:
+    return Op;
+  case ISD::ZERO_EXTEND:
+    return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
+  case ISD::SIGN_EXTEND:
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
+                       DAG.getValueType(NarrowVT));
+  }
+}
+
+static SDValue performANY_EXTENDCombine(SDNode *N, SelectionDAG &DAG,
----------------
wangleiat wrote:

Small naming suggestion: `performANY_EXTENDCombine` handles `ANY`, `ZERO`, and 
`SIGN` extends, not just `ANY`. Renaming it to `performExtendCombine` would 
better reflect what it does.

https://github.com/llvm/llvm-project/pull/163523
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to