https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/170246
None >From 1ca697196f5dd7bcc139304c67f9d1619f2c934d Mon Sep 17 00:00:00 2001 From: Qi Zhao <[email protected]> Date: Tue, 2 Dec 2025 10:32:22 +0800 Subject: [PATCH 1/2] [LoongArch] Custom legalize for 256-bit vector trunc (2/2) --- .../LoongArch/LoongArchISelLowering.cpp | 30 +++++++++++++++++++ .../Target/LoongArch/LoongArchISelLowering.h | 1 + 2 files changed, 31 insertions(+) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 5a3b53437a750..f2a55d9bf1469 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -618,6 +618,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerVECREDUCE(Op, DAG); case ISD::ConstantFP: return lowerConstantFP(Op, DAG); + case ISD::TRUNCATE: + return lowerTRUNCATE(Op, DAG); } return SDValue(); } @@ -675,6 +677,34 @@ static SDValue isNOT(SDValue V, SelectionDAG &DAG) { return SDValue(); } +SDValue LoongArchTargetLowering::lowerTRUNCATE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // Only need to consider v4i64->v4i32, v8i32->v8i16 and v16i16->v16i8. + if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + if (SrcVT != MVT::v4i64 && SrcVT != MVT::v8i32 && SrcVT != MVT::v16i16) + return SDValue(); + + unsigned WidenNumElts = NumElts * 2; + SmallVector<int, 32> Mask(WidenNumElts, -1); + for (unsigned i = 0; i < NumElts; ++i) + Mask[i] = 2 * i; + + MVT NewVT = MVT::getVectorVT(EltVT, WidenNumElts); + SDValue CastSrc = DAG.getBitcast(NewVT, Src); + SDValue Result = DAG.getVectorShuffle(NewVT, DL, CastSrc, CastSrc, Mask); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result, + DAG.getVectorIdxConstant(0, DL)); +} + SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 0c09fb6afd2d1..6596bfe447c3e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -240,6 +240,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; >From cc1d1257719cda60f6d84bcc49875807e80789b7 Mon Sep 17 00:00:00 2001 From: Qi Zhao <[email protected]> Date: Tue, 2 Dec 2025 10:33:00 +0800 Subject: [PATCH 2/2] update tests --- llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll | 140 +++--------------- llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll | 52 +++---- 2 files changed, 39 insertions(+), 153 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll index b5950fd55606e..108b77ba78e89 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll @@ -6,29 +6,19 @@ define void @trunc_v4i64_to_v4i32(ptr %res, ptr %a) nounwind { ; LA32-LABEL: trunc_v4i64_to_v4i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3 -; LA32-NEXT: vst $vr1, $a0, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0) +; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI0_0) +; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1 +; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: trunc_v4i64_to_v4i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3 -; LA64-NEXT: vst $vr1, $a0, 0 +; LA64-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0) +; LA64-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI0_0) +; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1 +; LA64-NEXT: vst $vr0, $a0, 0 ; LA64-NEXT: ret entry: %v = load <4 x i64>, ptr %a @@ -99,45 +89,17 @@ define void @trunc_v8i32_to_v8i16(ptr %res, ptr %a) nounwind { ; LA32-LABEL: trunc_v8i32_to_v8i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 3 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 4 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 5 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 6 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 7 -; LA32-NEXT: vst $vr1, $a0, 0 +; LA32-NEXT: xvpermi.d $xr1, $xr0, 78 +; LA32-NEXT: xvpickev.h $xr0, $xr1, $xr0 +; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: trunc_v8i32_to_v8i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 2 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 3 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 4 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 5 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 6 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 7 -; LA64-NEXT: vst $vr1, $a0, 0 +; LA64-NEXT: xvpermi.d $xr1, $xr0, 78 +; LA64-NEXT: xvpickev.h $xr0, $xr1, $xr0 +; LA64-NEXT: vst $vr0, $a0, 0 ; LA64-NEXT: ret entry: %v = load <8 x i32>, ptr %a @@ -180,79 +142,17 @@ define void @trunc_v16i16_to_v16i8(ptr %res, ptr %a) nounwind { ; LA32-LABEL: trunc_v16i16_to_v16i8: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 0 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 0 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 1 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 1 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 2 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 2 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 3 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 3 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 4 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 4 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 5 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 5 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 6 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 6 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 7 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 7 -; LA32-NEXT: xvpermi.d $xr0, $xr0, 14 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 0 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 8 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 1 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 9 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 2 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 10 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 3 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 11 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 4 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 12 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 5 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 13 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 6 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 14 -; LA32-NEXT: vpickve2gr.h $a1, $vr0, 7 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 15 -; LA32-NEXT: vst $vr1, $a0, 0 +; LA32-NEXT: xvpermi.d $xr1, $xr0, 78 +; LA32-NEXT: xvpickev.b $xr0, $xr1, $xr0 +; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: trunc_v16i16_to_v16i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 0 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 0 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 1 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 1 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 2 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 2 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 3 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 3 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 4 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 4 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 5 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 5 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 6 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 6 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 7 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 7 -; LA64-NEXT: xvpermi.d $xr0, $xr0, 14 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 0 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 8 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 1 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 9 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 2 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 10 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 3 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 11 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 4 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 12 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 5 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 13 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 6 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 14 -; LA64-NEXT: vpickve2gr.h $a1, $vr0, 7 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 15 -; LA64-NEXT: vst $vr1, $a0, 0 +; LA64-NEXT: xvpermi.d $xr1, $xr0, 78 +; LA64-NEXT: xvpickev.b $xr0, $xr1, $xr0 +; LA64-NEXT: vst $vr0, $a0, 0 ; LA64-NEXT: ret entry: %v = load <16 x i16>, ptr %a diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll index 09908f619fa1f..75c71dffd21c8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll @@ -884,39 +884,25 @@ define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) { } define i8 @xvmsk_ogt_v4f64_concat_poison(<4 x double> %vec) { -; LA32-LABEL: xvmsk_ogt_v4f64_concat_poison: -; LA32: # %bb.0: -; LA32-NEXT: xvrepli.b $xr1, 0 -; LA32-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2 -; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 0 -; LA32-NEXT: vinsgr2vr.h $vr0, $a3, 0 -; LA32-NEXT: vinsgr2vr.h $vr0, $a2, 1 -; LA32-NEXT: vinsgr2vr.h $vr0, $a1, 2 -; LA32-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; LA32-NEXT: vslli.h $vr0, $vr0, 15 -; LA32-NEXT: vmskltz.h $vr0, $vr0 -; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvmsk_ogt_v4f64_concat_poison: -; LA64: # %bb.0: -; LA64-NEXT: xvrepli.b $xr1, 0 -; LA64-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2 -; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1 -; LA64-NEXT: xvpickve2gr.d $a3, $xr0, 0 -; LA64-NEXT: vinsgr2vr.h $vr0, $a3, 0 -; LA64-NEXT: vinsgr2vr.h $vr0, $a2, 1 -; LA64-NEXT: vinsgr2vr.h $vr0, $a1, 2 -; LA64-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; LA64-NEXT: vslli.h $vr0, $vr0, 15 -; LA64-NEXT: vmskltz.h $vr0, $vr0 -; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: xvmsk_ogt_v4f64_concat_poison: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI34_0) +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI34_0) +; CHECK-NEXT: xvrepli.b $xr2, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr2, $xr0 +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3 +; CHECK-NEXT: vslli.h $vr0, $vr1, 15 +; CHECK-NEXT: vmskltz.h $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: ret %tobool = fcmp ogt <4 x double> %vec, zeroinitializer %insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> %res = bitcast <8 x i1> %insertvec to i8 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
