[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
https://github.com/ecnelises closed https://github.com/llvm/llvm-project/pull/86783 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/86783 >From b886dcf2da25417d9f8cd75ff4aa58686e35139d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 27 Mar 2024 17:11:04 +0800 Subject: [PATCH 1/4] [PowerPC] Implement 32-bit expansion for rldimi rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. --- clang/lib/Sema/SemaChecking.cpp | 1 - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 109 -- llvm/test/CodeGen/PowerPC/rldimi.ll | 366 3 files changed, 454 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 11401b6f56c0ea..d2cbe5417d682d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7436b202fba0d9..3281a0dfd08729 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10757,6 +10758,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } +static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME) { + assert(SH < 32 && MB < 32 && ME < 32 && + "Invalid argument for rotate insert!"); + return SDValue( + DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, + {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32), + DAG.getTargetConstant(ME, Loc, MVT::i32)}), + 0); +} + +static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME, bool IsPPC64) { + assert(SH < 64 && MB < 64 && ME < 64 && + "Invalid argument for rotate insert!"); + if (IsPPC64) { +// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. +if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); +} else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); +} +return SDValue(DAG.getMachineNode( + PPC::RLDIMI, Loc, MVT::i64, + {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), +DAG.getTargetConstant(MB, Loc, MVT::i32)}), + 0); + } + + // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH + // is adjusted to simplify cases. Invalid ranges will be skipped. + // - SrcHi inserted into DstHi with [0, 32-SH) + // - SrcLo inserted into DstHi with [32-SH, 32) + // - SrcHi inserted into DstLo with [32, 64-SH) + // - SrcLo inserted into DstLo with [64-SH, 64) + auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); + auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); + if (SH >= 32) { +SH -= 32; +std::swap(SrcLo, SrcHi); + } + auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, + SDValue Src, SDValue Dst, unsigned MB, + unsigned ME) { +if (Left > Right) + return Dst; + +if (MB <= ME) { + if (MB <= Right && ME >= Left) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, + std::max(MB, Left) % 32, + std::min(ME, Right) % 32); +} else { + if (MB < Left || ME > Right) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32); + + if (MB <= Right && ME < Left) +return getRotateInsert32(DAG, Loc
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/86783 >From b886dcf2da25417d9f8cd75ff4aa58686e35139d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 27 Mar 2024 17:11:04 +0800 Subject: [PATCH 1/3] [PowerPC] Implement 32-bit expansion for rldimi rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. --- clang/lib/Sema/SemaChecking.cpp | 1 - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 109 -- llvm/test/CodeGen/PowerPC/rldimi.ll | 366 3 files changed, 454 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 11401b6f56c0ea..d2cbe5417d682d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7436b202fba0d9..3281a0dfd08729 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10757,6 +10758,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } +static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME) { + assert(SH < 32 && MB < 32 && ME < 32 && + "Invalid argument for rotate insert!"); + return SDValue( + DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, + {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32), + DAG.getTargetConstant(ME, Loc, MVT::i32)}), + 0); +} + +static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME, bool IsPPC64) { + assert(SH < 64 && MB < 64 && ME < 64 && + "Invalid argument for rotate insert!"); + if (IsPPC64) { +// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. +if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); +} else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); +} +return SDValue(DAG.getMachineNode( + PPC::RLDIMI, Loc, MVT::i64, + {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), +DAG.getTargetConstant(MB, Loc, MVT::i32)}), + 0); + } + + // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH + // is adjusted to simplify cases. Invalid ranges will be skipped. + // - SrcHi inserted into DstHi with [0, 32-SH) + // - SrcLo inserted into DstHi with [32-SH, 32) + // - SrcHi inserted into DstLo with [32, 64-SH) + // - SrcLo inserted into DstLo with [64-SH, 64) + auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); + auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); + if (SH >= 32) { +SH -= 32; +std::swap(SrcLo, SrcHi); + } + auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, + SDValue Src, SDValue Dst, unsigned MB, + unsigned ME) { +if (Left > Right) + return Dst; + +if (MB <= ME) { + if (MB <= Right && ME >= Left) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, + std::max(MB, Left) % 32, + std::min(ME, Right) % 32); +} else { + if (MB < Left || ME > Right) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32); + + if (MB <= Right && ME < Left) +return getRotateInsert32(DAG, Loc
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
https://github.com/ecnelises edited https://github.com/llvm/llvm-project/pull/86783 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/86783 >From b886dcf2da25417d9f8cd75ff4aa58686e35139d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 27 Mar 2024 17:11:04 +0800 Subject: [PATCH 1/2] [PowerPC] Implement 32-bit expansion for rldimi rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. --- clang/lib/Sema/SemaChecking.cpp | 1 - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 109 -- llvm/test/CodeGen/PowerPC/rldimi.ll | 366 3 files changed, 454 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 11401b6f56c0ea..d2cbe5417d682d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7436b202fba0d9..3281a0dfd08729 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10757,6 +10758,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } +static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME) { + assert(SH < 32 && MB < 32 && ME < 32 && + "Invalid argument for rotate insert!"); + return SDValue( + DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, + {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32), + DAG.getTargetConstant(ME, Loc, MVT::i32)}), + 0); +} + +static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME, bool IsPPC64) { + assert(SH < 64 && MB < 64 && ME < 64 && + "Invalid argument for rotate insert!"); + if (IsPPC64) { +// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. +if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); +} else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); +} +return SDValue(DAG.getMachineNode( + PPC::RLDIMI, Loc, MVT::i64, + {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), +DAG.getTargetConstant(MB, Loc, MVT::i32)}), + 0); + } + + // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH + // is adjusted to simplify cases. Invalid ranges will be skipped. + // - SrcHi inserted into DstHi with [0, 32-SH) + // - SrcLo inserted into DstHi with [32-SH, 32) + // - SrcHi inserted into DstLo with [32, 64-SH) + // - SrcLo inserted into DstLo with [64-SH, 64) + auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); + auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); + if (SH >= 32) { +SH -= 32; +std::swap(SrcLo, SrcHi); + } + auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, + SDValue Src, SDValue Dst, unsigned MB, + unsigned ME) { +if (Left > Right) + return Dst; + +if (MB <= ME) { + if (MB <= Right && ME >= Left) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, + std::max(MB, Left) % 32, + std::min(ME, Right) % 32); +} else { + if (MB < Left || ME > Right) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32); + + if (MB <= Right && ME < Left) +return getRotateInsert32(DAG, Loc
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
bzEq wrote: > due to backward compatibility, it needs to be expanded into series of rlwimi > in 32-bit environment Why must be 'series of rlwimi'? Why don't we just expand it following what ISA describes and let legalizer generates code sequence under 32-bit mode? ``` n ← sh5 || sh0:4 r ← ROTL64((RS), n) b ← mb5 || mb0:4 m ← MASK(b, ¬n) RA ← r&m | (RA) & ¬m ``` https://github.com/llvm/llvm-project/pull/86783 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/86783 >From b886dcf2da25417d9f8cd75ff4aa58686e35139d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 27 Mar 2024 17:11:04 +0800 Subject: [PATCH] [PowerPC] Implement 32-bit expansion for rldimi rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. --- clang/lib/Sema/SemaChecking.cpp | 1 - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 109 -- llvm/test/CodeGen/PowerPC/rldimi.ll | 366 3 files changed, 454 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 11401b6f56c0ea..d2cbe5417d682d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7436b202fba0d9..3281a0dfd08729 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10757,6 +10758,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } +static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME) { + assert(SH < 32 && MB < 32 && ME < 32 && + "Invalid argument for rotate insert!"); + return SDValue( + DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, + {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32), + DAG.getTargetConstant(ME, Loc, MVT::i32)}), + 0); +} + +static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME, bool IsPPC64) { + assert(SH < 64 && MB < 64 && ME < 64 && + "Invalid argument for rotate insert!"); + if (IsPPC64) { +// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. +if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); +} else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); +} +return SDValue(DAG.getMachineNode( + PPC::RLDIMI, Loc, MVT::i64, + {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), +DAG.getTargetConstant(MB, Loc, MVT::i32)}), + 0); + } + + // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH + // is adjusted to simplify cases. Invalid ranges will be skipped. + // - SrcHi inserted into DstHi with [0, 32-SH) + // - SrcLo inserted into DstHi with [32-SH, 32) + // - SrcHi inserted into DstLo with [32, 64-SH) + // - SrcLo inserted into DstLo with [64-SH, 64) + auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); + auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); + if (SH >= 32) { +SH -= 32; +std::swap(SrcLo, SrcHi); + } + auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, + SDValue Src, SDValue Dst, unsigned MB, + unsigned ME) { +if (Left > Right) + return Dst; + +if (MB <= ME) { + if (MB <= Right && ME >= Left) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, + std::max(MB, Left) % 32, + std::min(ME, Right) % 32); +} else { + if (MB < Left || ME > Right) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32); + + if (MB <= Right && ME < Left) +return getRotateInsert32(DAG, Loc, Ds
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-powerpc Author: Qiu Chaofan (ecnelises) Changes rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. --- Patch is 20.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86783.diff 3 Files Affected: - (modified) clang/lib/Sema/SemaChecking.cpp (-1) - (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+88-21) - (modified) llvm/test/CodeGen/PowerPC/rldimi.ll (+366) ``diff diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 08449581330934..5e8228ed998978 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cce0efad39c75b..7e42773f3aa1cd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10748,6 +10749,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } +static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME) { + assert(SH < 32 && MB < 32 && ME < 32 && + "Invalid argument for rotate insert!"); + return SDValue( + DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, + {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32), + DAG.getTargetConstant(ME, Loc, MVT::i32)}), + 0); +} + +static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME, bool IsPPC64) { + assert(SH < 64 && MB < 64 && ME < 64 && + "Invalid argument for rotate insert!"); + if (IsPPC64) { +// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. +if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); +} else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); +} +return SDValue(DAG.getMachineNode( + PPC::RLDIMI, Loc, MVT::i64, + {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), +DAG.getTargetConstant(MB, Loc, MVT::i32)}), + 0); + } + + // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH + // is adjusted to simplify cases. Invalid ranges will be skipped. + // - SrcHi inserted into DstHi with [0, 32-SH) + // - SrcLo inserted into DstHi with [32-SH, 32) + // - SrcHi inserted into DstLo with [32, 64-SH) + // - SrcLo inserted into DstLo with [64-SH, 64) + auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); + auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); + if (SH >= 32) { +SH -= 32; +std::swap(SrcLo, SrcHi); + } + auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, + SDValue Src, SDValue Dst, unsigned MB, + unsigned ME) { +if (Left > Right) + return Dst; + +if (MB <= ME) { + if (MB <= Right && ME >= Left) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, + std::max(MB, Left) % 32, + std::min(ME, Right) % 32); +} else { + if (MB < Left || ME > Right) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32); + + if (MB <= Right && ME < Left) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32); + + if (MB <= Righ
[clang] [llvm] [PowerPC] Implement 32-bit expansion for rldimi (PR #86783)
https://github.com/ecnelises created https://github.com/llvm/llvm-project/pull/86783 rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. >From 3362a81ca64e5dec6e64e4ed544c30078025db15 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 27 Mar 2024 17:11:04 +0800 Subject: [PATCH] [PowerPC] Implement 32-bit expansion for rldimi rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. --- clang/lib/Sema/SemaChecking.cpp | 1 - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 109 -- llvm/test/CodeGen/PowerPC/rldimi.ll | 366 3 files changed, 454 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 08449581330934..5e8228ed998978 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cce0efad39c75b..7e42773f3aa1cd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10748,6 +10749,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } +static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME) { + assert(SH < 32 && MB < 32 && ME < 32 && + "Invalid argument for rotate insert!"); + return SDValue( + DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, + {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32), + DAG.getTargetConstant(ME, Loc, MVT::i32)}), + 0); +} + +static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME, bool IsPPC64) { + assert(SH < 64 && MB < 64 && ME < 64 && + "Invalid argument for rotate insert!"); + if (IsPPC64) { +// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. +if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); +} else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, +DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); +} +return SDValue(DAG.getMachineNode( + PPC::RLDIMI, Loc, MVT::i64, + {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), +DAG.getTargetConstant(MB, Loc, MVT::i32)}), + 0); + } + + // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH + // is adjusted to simplify cases. Invalid ranges will be skipped. + // - SrcHi inserted into DstHi with [0, 32-SH) + // - SrcLo inserted into DstHi with [32-SH, 32) + // - SrcHi inserted into DstLo with [32, 64-SH) + // - SrcLo inserted into DstLo with [64-SH, 64) + auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); + auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); + if (SH >= 32) { +SH -= 32; +std::swap(SrcLo, SrcHi); + } + auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, + SDValue Src, SDValue Dst, unsigned MB, + unsigned ME) { +if (Left > Right) + return Dst; + +if (MB <= ME) { + if (MB <= Right && ME >= Left) +return getRotateInsert32(DAG, Loc, Dst, Src, SH, + std::max(MB, Left) % 32, + std::min(ME, Right) % 32)